-
Notifications
You must be signed in to change notification settings - Fork 0
/
allocation.py
1566 lines (1272 loc) · 57.1 KB
/
allocation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
"""
__author__ = 'James D. Gaboardi <jgaboardi@gmail.com>'
# standard library imports
import copy, os, time, warnings
# non-standard library imports
import geopandas as gpd
import numpy as np
from scipy.spatial import Voronoi
from shapely.geometry import Point, MultiPoint
from shapely.geometry import LineString, MultiLineString
from shapely.geometry import Polygon, MultiPolygon
from shapely.ops import polygonize
# project library imports
from . import utils
from . import spaghetti as spgh
def high_precision_id(c1, c2, c3):
"""mid level function -- vectorized function for generating a high
precision object ID by combining the polygon ID, line segment ID,
and dataframe index
Parameters
----------
c1 : cell in geopandas.GeoSeries
GEOID
c2 : cell in geopandas.GeoSeries
line segment id
c3 : cell in geopandas.GeoSeries
dataframe index
Returns
-------
idx : str
combined GEOID_SegmID_index for pp2n or okabe
"""
idx = '%s_%s_%s' % (c1, c2, c3)
return idx
def pp2n(segms, polys, columns=None, offset_=None, remove_segm=None,
restrict_col=None, drop_cols=None, xyid=None, geo_col=None,
sid_name=None, poly_key=None, poly_pop=None, proj_init=None,
problems_writer=None, print_diags=True,
line_pos=0.5, exhaustion_threshold=.9, pp2n_pointloc_increment=0.05,
parallel_increment=0.5, pop_type=float, pp2n_id='pp2n_id',
pp2n_col='pop_pp2n', mtfcc='MTFCC', len_segm='len_segm',
len_tot='len_tot', pop_rat='pop_rat'):
"""top level function
-- Population Polygon To Network (pp2n) --
generate representative points near network segments
which are assigned a proportional population weight
based on segment length and total segment length of
segments associated with census geographies.
Parameters
----------
segms : geopandas.GeoDataFrame
road network segments
polys : geopandas.GeoDataFrame
census geoegraphies
columns : list
names of columns to create in new pp2n dataframe
offset_ : float
distance of pp2n parallel offset from the line segment.
Default is None.
line_pos : float
normalized position along the line segment at which the initial
attempt to create the pp2n point should be conducted.
Default is 0.5.
exhaustion_threshold : float
normalized position along the line segment at which the
algorithm should terminate if no feasible points are found.
Default is 0.9.
pp2n_pointloc_increment : float
if a feasible pp2n point cannot be located at the `line_pos`
try postive and negative increments of this value until either
the line is exhausted or feasible locations are found.
Default is 0.05.
parallel_increment:
decrease by `parallel_increment` when generating a parallel
offset that results in a LinearRing or LINESTRING EMPTY.
Default is 0.5.
remove_segm : list
segment MTFCC types to exclude from pp2n generation.
restrict_col : str
segment label to restrict (road segemnt mtfcc).
drop_cols : list
column names to drop following spatial join of the pp2n points
and census geographies.
pop_type : {float, int}
data type of the generated pp2n population. Default is float.
sid_name : str
segment column name. Default is None.
xyid : str
combined x-coord + y-coords string ID. Default is None.
problems_writer : str
directory to write pickled population problems dict
proj_init : int
intial coordinate reference system. default is None.
geo_col : str
geometry column name. Default is None.
poly_key : str
polygon dataframe key column name. Default is None.
poly_pop : str
polygon dataframe population column name. Default is None.
pp2n_id : str
pp2n ID column name. Default is 'pp2n_id'.
pp2n_col : str
pp2n population column name. Default is 'pp2n_col'.
mtfcc : str
MTFCC dataframe column name. Default is 'MTFCC'.
len_segm : str
segment length column name. Default is 'len_segm'.
len_tot : str
total associated length column name. Default is 'len_tot'.
pop_rat : str
population ratio column name. Default is 'pop_rat'.
print_diags : bool
True
Returns
-------
df : geopandas.GeoDataFrame
generated, weighted pp2n points
"""
orig_pop = polys[poly_pop].sum()
# list of pp2n points
pp2n_pts = create_pp2n(segms, polys, offset_=offset_, line_pos=line_pos,
parallel_increment=parallel_increment,
exhaustion_threshold=exhaustion_threshold,
pp2n_pointloc_increment=pp2n_pointloc_increment,
remove=remove_segm, geo_col=geo_col,
mtfcc=mtfcc, sid_name=sid_name)
# create pp2n dataframe
df, orphans = create_pp2n_df(pp2n_pts, polys, segms, columns, poly_key,
crs=proj_init, remove=remove_segm,
drop=drop_cols, sid_name=sid_name,
pp2n_col=pp2n_col, len_segm=len_segm,
len_tot=len_tot, pop_rat=pop_rat,
mtfcc=mtfcc, geo_col=geo_col,
restrict_col=restrict_col,
poly_pop=poly_pop)
# add unique id
df[pp2n_id] = np.vectorize(high_precision_id)\
(df[poly_key], df[sid_name], df.index)
# proportionalize population
df = div_pop(df, orphans, pp2n_col=pp2n_col, poly_key=poly_key,
len_tot=len_tot, pop_rat=pop_rat, len_segm=len_segm,
poly_pop=poly_pop, as_type=pop_type, offset_=offset_,
problems_writer=problems_writer)
est_pop = df[pp2n_col].sum()
# check that original population is equivalent to pp2n population
if not np.isclose([orig_pop], [est_pop]):
raise RuntimeError('Estimated and original population not equal.' \
+ orig_pop, '!=', est_pop)
n_orphans = len(orphans) if orphans else 0
if print_diags:
print('\t\t\t\tOriginal population:\t', orig_pop)
print('\t\t\t\tEstimated population:\t', est_pop)
print('\t\t\t\tOrphans:\t\t', n_orphans)
df = df[df[pp2n_col] > 0.0]
df.reset_index(inplace=True, drop=True)
# set xyID
node2xyid = utils.generate_xyid(df=df, geom_type='node', geo_col=geo_col)
df = utils.fill_frame(df, idx='index', col=xyid, data=node2xyid)
return df
def create_pp2n(segms, polys, offset_=None, line_pos=None,
exhaustion_threshold=None, pp2n_pointloc_increment=None,
parallel_increment=None, remove=None, geo_col=None,
mtfcc=None, sid_name=None):
"""mid level function -- create a list of feasible pp2n points
in the form -- [[idx1, point1], [idx1, point2], ...]
Parameters
----------
segms : see pp2n()
polys : see pp2n()
offset_ : see pp2n()
line_pos : see pp2n()
exhaustion_threshold : see pp2n()
pp2n_pointloc_increment : see pp2n()
parallel_increment : see pp2n()
remove : see `remove_segm` in pp2n()
geo_col : see pp2n()
mtfcc : see pp2n()
sid_name : see pp2n()
Returns
-------
pp2ns : list
generated pp2n points and associated segment ids
"""
# segment count
n_segms = segms.shape[0]
# unary union of all census geographies in the study area
union_polys = create_valid_unary(polys.unary_union)
# list to store pp2n points
pp2ns = []
# iterate over each segment in the set
for idx in range(n_segms):
# skip if not parallel offset should be attempted
if segms.loc[idx, mtfcc] in remove:
continue
# line of interest
loi = segms.loc[idx, geo_col]
# create a buffered offset from line-of-interest envelope
loi_buffer = loi.envelope.buffer(offset_)
# then extract the intersection
local_poly = union_polys.intersection(loi_buffer)
# prevent shapely from rearranging the line coords
# (as in the case when a creating a new linear ring)
is_ring = False
if loi.is_ring:
is_ring = True
# create two parallel offsets of the `loi`
offset_lines = gen_offsets(loi, offset_, for_ring=is_ring,
parallel_increment=parallel_increment)
if not offset_lines:
seg_id = segms.loc[idx, sid_name]
warn_msg = '\t\tno parallel lines generated for %s' % seg_id
warnings.warn(warn_msg)
continue
determining_pp2n = True
# iterate over the two offset lines until (1) two points
# have been generated that intersect with the unioned
# geographies study area; or (2) the line length is
# exhausted.
while determining_pp2n:
segm_pp2n_pts = []
for ofs_idx, ofl in enumerate(offset_lines):
fixing_loc = True
pp2n_pointloc = line_pos
while fixing_loc:
is_midpoint = False
# if the generated pp2n point is at the center
# of the line, record it and proceed
if pp2n_pointloc == line_pos:
# interpolate pp2n point along the line
pp2n_point = ofl.interpolate(pp2n_pointloc,
normalized=True)
# if the point is within the single, unioned
# polygons object record it
if pp2n_point.intersects(local_poly):
segm_pp2n_pts.append(pp2n_point)
is_midpoint = True
# once two pp2n points have been located
# break out of the while loop
if len(segm_pp2n_pts) == 2:
determining_pp2n = False
# once a viable point has been located
# break out of the location fixer
fixing_loc = False
# if the generated pp2n point is not at the
# center of the line, iterate in +/- .05
# increments until an intersecting point is
# found or the line is exhausted
if not is_midpoint:
while pp2n_pointloc < exhaustion_threshold\
and fixing_loc:
pp2n_pointloc += pp2n_pointloc_increment
increments = [pp2n_pointloc*-1.,
pp2n_pointloc]
for incr in increments:
# interpolate pp2n point along the line
pp2n_point = ofl.interpolate(incr,
normalized=True)
if pp2n_point.intersects(local_poly):
segm_pp2n_pts.append(pp2n_point)
# once a viable point has been
# located break out of the
# location fixer
fixing_loc = False
# once two pp2n points have been located
# break out of the while loop
if len(segm_pp2n_pts) == 2:
determining_pp2n = False
# if not viable point has been located
# after exhausting the line, break out
# of the location fixer
fixing_loc = False
# break out of loop when either (a) pp2n points have
# been located; or (b) pp2n points have been deteremined
# to not be feasible
determining_pp2n = False
# add pp2n points to list unless none were located
if segm_pp2n_pts:
pp2ns.extend([[idx, pp2n_pt] for pp2n_pt in segm_pp2n_pts])
return pp2ns
def create_valid_unary(unary, nominal_buffer=0.):
"""if polygon is no valid make valid
this works if the object is a Polygon or MultiPolygon
Parameters
----------
unary : {shapely.Polygon, shapely.MultiPolygon}
may or may not consist of valid geometries
nominal_buffer : {float, int}
0. or small buffer to perform in order to revalid
polygon geometries
Returns
-------
unary : {shapely.Polygon, shapely.MultiPolygon}
single valid geometries or multiple valid geometries
"""
# if the geoemtry(ies) are already valid do nothing
if not unary.is_valid:
try:
# if there are multiple polygons this will pass, else
# as TypeError will be thrown because
# 'Polygons have no length'
len(unary)
# convert MultiPolygon to list of polygons
unary_polys = list(unary)
valid_polys = []
for poly in unary_polys:
# if not valid geometry, make valid by
# performing a buffer
if not poly.is_valid:
poly = poly.buffer(nominal_buffer)
# add the valid geometry to list
if poly.is_valid:
valid_polys.append(poly)
# if geometry still not valid we have a problem
else:
raise ValueError('Polygon still not valid after buffer.')
# cast back as MultiPolygon
unary = MultiPolygon(valid_polys)
except TypeError:
unary = unary.buffer(nominal_buffer)
return unary
def gen_offsets(loi_, offset_ , parallel_increment=None, for_ring=False):
"""low level function for generating parallel offset lines
Parameters
----------
loi_ : shapely.geometry.LineString
network line segment in question
offset_ : see pp2n(offset_)
parallel_increment : see pp2n()
for_ring : bool
flag if segment is a LinearRing [True] or not [False].
Default is False.
Returns
-------
offset_lines : list
two parallel line offset by `offset_` meters
"""
# standard 'non-ring' line case
if not for_ring:
offset_lines = _gen_offsets(loi_,
offset_,
parallel_increment)
# linear ring case
else:
fixing_ring_inset = for_ring
while fixing_ring_inset:
# generate parallel offset lines
loi_ = LineString(loi_.coords[1:-1])
offset_lines = _gen_offsets(loi_,
offset_,
parallel_increment)
still_ring = False
# check to see if the inner offset
# resulted in a LinearRing
for ofl in offset_lines:
# if it did decrease the offset
# by `parallel_increment`
if ofl.is_ring:
offset_ -= parallel_increment
still_ring = True
# segment no long a ring
# break out of loop
if not still_ring:
fixing_ring_inset = False
return offset_lines
def _gen_offsets(loi_, initial_offset, parallel_increment):
"""helper function for `gen_offsets()`
Parameters
----------
loi_ : see gen_offsets()
initial_offset : see pp2n(offset_)
parallel_increment : ee pp2n(parallel_increment)
Returns
-------
offset_lines_ : list
two parallel line offset by `initial_offset` meters or by
`initial_offset` - (`parallel_increment` * INCR) when
`initial_offset` results in an infeasible line; where INCR in
the number of increments
"""
offset_lines_ = []
sides = ['right', 'left']
for side in sides:
complete = False
offset = initial_offset
while not complete:
offset_loi = loi_.parallel_offset(offset, side=side)
try:
if len(offset_loi.coords[:]) > 1:
offset_lines_.append(offset_loi)
complete = True
else:
offset -= parallel_increment
except NotImplementedError:
if type(offset_loi) == MultiLineString:
offset -= parallel_increment
if offset <= 0:
complete = True
else:
raise TypeError(type(offset_loi), 'not supported')
return offset_lines_
def create_pp2n_df(pts, polys, segms, cols, poly_key, crs=None,
remove=None, drop=None, pp2n_col=None, geo_col=None,
sid_name=None, len_segm=None, len_tot=None,
pop_rat=None, mtfcc=None, restrict_col=None,
poly_pop=None):
"""mid level function -- instantiate a GeoDataFrame
for the pp2n points
Parameters
----------
crs : dict
coordinate reference system of the polygon dataframe
pts : see pp2n()
polys : see pp2n()
segms : see pp2n()
cols : see pp2n(columns)
poly_key : see pp2n()
poly_pop : see pp2n()
remove : see pp2n(remove_segm)
drop : see pp2n(drop_cols)
pp2n_col : see pp2n()
restrict_col : see pp2n()
sid_name : see pp2n()
geo_col : see pp2n()
len_segm : see pp2n()
len_tot : see pp2n()
pop_rat : see pp2n()
mtfcc : see pp2n()
Returns
-------
df : list
generated pp2n points and associated segment ids
"""
# columns for dataframe generation
cols = cols + [pp2n_col, len_segm, len_tot, pop_rat]
# generate dataframe of pp2n points
empty_shell = np.empty((len(pts), len(cols)))
df = gpd.GeoDataFrame(empty_shell, columns=cols, crs=crs,
geometry=[geom for (idx,geom) in pts])
df = utils.set_crs(df, proj_init=crs)
# set associated segment ids
df[sid_name] = [idx for (idx,geom) in pts]
# set associated segment lengths
df[len_segm] = [segms.loc[idx, geo_col].length for idx in df[sid_name]]
# record associated segment mtfcc
df[restrict_col] = [segms.loc[idx, mtfcc] for idx in df[sid_name]]
# perform a spatial join
polys = utils.set_crs(polys, proj_init=crs)
df = gpd.sjoin(df, polys)
# drop irrelevant columns
if drop:
df.drop(drop, axis=1, inplace=True)
# determine whether any populated polygons in the dataset
# were not given pp2n points
orphans = list(set(polys[poly_key]).symmetric_difference(df[poly_key]))
# if there were, label them as orphans,
# give them the polygon attributes,
# and represent them a single centroid
if orphans:
empty_shell = np.empty((len(orphans), df.shape[1]))
orphans_df = gpd.GeoDataFrame(empty_shell, columns=df.columns, crs=crs)
orphans_df[poly_key] = orphans
orphans_df[geo_col] = [polys.loc[(polys[poly_key] == o), geo_col]\
.squeeze().centroid for o in orphans]
orphans_df[pp2n_col] = [polys.loc[(polys[poly_key] == o), poly_pop]\
.squeeze() for o in orphans]
label_orphan = [restrict_col, sid_name, len_segm, len_tot, pop_rat]
for lo in label_orphan:
orphans_df[lo] = ['ORPHAN'] * len(orphans)
for o in orphans:
for pc in polys.columns:
if pc in [poly_key, geo_col]:
continue
odf_key = (orphans_df[poly_key] == o)
pdf_key = (polys[poly_key] == o)
orphans_df.loc[odf_key, pc] = polys.loc[pdf_key, pc].squeeze()
df = df.append(orphans_df)
df.reset_index(inplace=True, drop=True)
df = utils.set_crs(df, proj_init=crs)
return df, orphans
def div_pop(df, orphans, as_type=None, pp2n_col=None, poly_key=None,
len_segm=None, poly_pop=None, len_tot=None, pop_rat=None,
offset_=None, problems_writer=None):
""" mid level function -- assign a ratio of the total population
to each pp2n point within a census geography based on the ratio of
the associated line segment length to the total length of line
segments associated with the census geography
Parameters
----------
df : geopandas.GeoDataFrame
pp2n dataframe
orphans : list
GEOIDs for populated polygons that no suitable pp2n point
could be found.
as_type : {int, float}
data type of the generated pp2n population. Default is None.
pp2n_col : see pp2n()
poly_key : see pp2n()
poly_pop : see pp2n()
len_segm : see pp2n()
len_tot : see pp2n()
pop_rat : see pp2n()
offset_ : see pp2n()
problems_writer : see pp2n()
Returns
-------
df : geopandas.GeoDataFrame
updated pp2n dataframe
"""
# for each group of pp2n points within
# a single census geography
for idx in df[poly_key].unique():
if idx in orphans:
continue
# subset the dataframe
pre_subset = df[df[poly_key] == idx].copy()
# and calculate the total length of segments
# associated with the census geography
total_length = pre_subset[len_segm].sum()
# then calculate the ratio of length and set the
# calculate the population from the ratio
for ss_idx in pre_subset.index:
# set total length associated with geometry
df.loc[ss_idx, len_tot] = total_length
# set population ratio of associated geometry
df.loc[ss_idx, pop_rat] = pre_subset.loc[ss_idx, len_segm] \
/ df.loc[ss_idx, len_tot]
# set population associated with pp2n point
df.loc[ss_idx, pp2n_col] = as_type(df.loc[ss_idx, pop_rat] \
* df.loc[ss_idx, poly_pop])
# check and adjust population as needed
df = check_pop(df, idx, poly_key, poly_pop, pp2n_col)
if as_type is int:
df[pp2n_col] = df[pp2n_col].astype(as_type)
# run final confirmation check for population
_check_pop(df, idx, poly_key, poly_pop, pp2n_col,
offset_=offset_, problems_writer=problems_writer)
return df
def check_pop(df_, id_, poly_key, pop_col, pp2n_col,
problems=None, round2=False):
"""low level function -- ensure the sum of the pp2n population
is equal to the original population
Parameters
----------
df_ : geopandas.GeoDataFrame
pp2n dataframe
idx : int
census geography ID
poly_key : str
census geography ID column name
pop_col : str
census geography population column name
pp2n_col : see pp2n()
problems : dict
cases where the sum of ratio population does
not equal the original total
round2 : bool
flag set for [True] if second round confirmation.
Default is [False].
Returns
-------
df_ : geopandas.GeoDataFrame
adjusted (or not) pp2n dataframe
problems : dict
updated `problems` (only during second check)
"""
subset = df_[df_[poly_key] == id_]
pp2n_sum = subset[pp2n_col].sum()
adjuster = subset.index[0]
orig_sum = subset[pop_col][adjuster]
if not np.isclose([pp2n_sum], [orig_sum]):
if not round2:
diff = pp2n_sum - orig_sum
actual = df_.loc[adjuster, pp2n_col]
df_.loc[adjuster, pp2n_col] = actual - diff
elif round2:
problems[id_] = {'pp2n_sum':pp2n_sum,
'orig_sum':orig_sum}
if not round2:
return df_
if round2:
return problems
def _check_pop(df_, id_, poly_key, pop_col, pp2n_col,
problems=None, round2=True, offset_=None,
problems_writer=None):
""" low level function -- ensure the sum of the pp2n population
is equal to the original population
Parameters
----------
df_ : see check_pop()
id_ : see check_pop()
poly_key : see check_pop()
pop_col : see check_pop()
pp2n_col : see check_pop()
problems : see check_pop()
round2 : see check_pop()
offset_ : see pp2n()
problems_writer : see pp2n()
"""
problems = {}
for idx in df_[poly_key].unique():
problems = check_pop(df_, id_, poly_key, pop_col, pp2n_col,
problems=problems, round2=round2)
if problems:
warnings.warn('\n\npp2n sum is not equal to original population sum.')
warnings.warn(problems.__str__())
# pickle `problems dict`
prob_name = '%s%s' % (pp2n_col, offset_)
spgh.dump_pickled(problems, problems_writer, pickle_name=prob_name)
def va2n(area, alloc_dir, geogs, net_segms, net_nodes, remove_segm=None,
mtfcc=None, cols_in=None, geo_col=None, sid_name=None, inter=None,
xyid=None, voronoi_diagnostic=True, clip_by=None, bounds=None,
poly_key=None, poly_pop=None, va2n_polys=None, proj_init=None,
initial_rho=10, incremental_rho=10, offset_param=2., area_thresh=.8,
symdiff_buff=0.00001, halt_at=1000, va2n_id='va2n_id',
va2n_col='pop_va2n', geo_area='geo_area', rat_area='rat_area',
file_type='.shp'):
""" Top level function for vector area-to-network data conversion method
of allocating polygon weights to a network.
CITE MOIOKA?
Parameters
----------
area : str
study area within county. Default is None.
alloc_dir : str
path to `allocation` data. Default is None.
geogs : geopandas.GeoDataFrame
census geographies
net_segms : geopandas.GeoDataFrame
road network segments
net_nodes : geopandas.GeoDataFrame
road network nodes (articulation points)
remove_segm : list
no population on roads that should never have population.
mtfcc : str
MAF/TIGER Feature Class code. Default is None.
cols_in : list
columns to keep after spatial join / dissolve. Default is None.
geo_col : str
geometry column name. Default is None.
sid_name : str
segment column name. Default is None.
inter : str
file path to intermediary data. Default is None.
xyid : str
combined x-coord + y-coords string ID. Default is None.
voronoi_diagnostic: bool
print iteration diagnostics. Default is True.
clip_by : shapely.geometry.[Multi]Polygon
final boundary for trimming voronoi cells. Default is None.
bounds : shapely.geometry.Polygon
outer boundary to trim reaching Voronoi cells. Default is None.
poly_key : str
polygon dataframe key column name. Default is None.
poly_pop : str
polygon dataframe population column name. Default is None.
va2n_polys : str
derivation from census geography file name. Default is None.
proj_init : int
intial coordinate reference system. default is None.
initial_rho : int
intial count of points to generate along line segments.
Default is 10.
incremental_rho : int
increase `initial_rho` by this number for each failed iteration
of LVD generation, where a failed iteration is defined as an
unequal number of line segment and LVD cells. Default is 10.
area_thresh : {int, float}
ratio of the calculated symmetric differnce areas of the LVD
polygon cells to the area of the segments envelope. The
`area_ratio` must be greater than `area_thresh` for the
algorithm to terminate successfully. Default is .95.
symdiff_buff : {int, float}
value to buffer symmetrical difference overlay. Default is .5.
halt_at : int
break iteration. Default is 1000 (rho).
va2n_id : str
va2n ID column name. Default is 'va2n_id'.
va2n_col : str
va2n population column name. Default is 'va2n_col'.
portion of population.
geo_area : str
label for column of area from census geographies subset.
Default is 'geo_area'.
rat_area : str
label for column of area ratio from census geographies subset.
Default is 'rat_area'.
file_type : str
file extension. Default is '.shp'.
Returns
-------
va2n_points_df : geopandas.GeoDataFrame
one point at the mid point of each line segment that has
a population associated with it.
"""
# -- Line Voronoi Diagram
lvd_file = '%slvd_%s_%s%s' % (alloc_dir, initial_rho, area, file_type)
# if the file already exists skip
#lvd_file_exists = os.path.exists(lvd_file)
lvd_file_exists = False
if not lvd_file_exists:
print('\t\t\tStart LVD -----')
start_lvd = time.time()
# create line voronoi diagram
lvd = faux_lvd(area, net_segms, net_nodes, remove_segm=remove_segm,
mtfcc=mtfcc, bounds=bounds, clip_by=clip_by,
initial_rho=initial_rho, offset_param=offset_param,
incremental_rho=incremental_rho, id_col=sid_name,
cols_in=cols_in, symdiff_buff=symdiff_buff,
diagnostic=voronoi_diagnostic, area_thresh=area_thresh,
geo_col=geo_col, inter=inter, proj_init=proj_init,
file_type=file_type)
end_lvd = round((time.time()-start_lvd)/60., 10)
print('\t\t\tEnd LVD ----- %s' % end_lvd)
lvd.to_file(lvd_file)
else:
lvd = gpd.read_file(lvd_file)
lvd = utils.set_crs(lvd, proj_init=proj_init)
# -- va2n Polygons
va2n_poly_file = '%s%s%s' % (inter, va2n_polys, file_type)
# if the file already exists skip
#va2n_poly_file_exists = os.path.exists(va2n_poly_file)
va2n_poly_file_exists = False
if not va2n_poly_file_exists:
# overlay -- Union of census geographies and LVD cells
va2n_polys_df = gpd.overlay(geogs, lvd, how='union')
va2n_polys_df = utils.set_crs(va2n_polys_df, proj_init=proj_init)
# drop NaNs
va2n_polys_df.dropna(inplace=True)
# Convert IDs back to integer
va2n_polys_df[poly_key] = va2n_polys_df[poly_key].astype(int)
va2n_polys_df[sid_name] = va2n_polys_df[sid_name].astype(int)
# create new empty column to house new va2n area
va2n_polys_df[geo_area] = np.zeros
# fetch area for each original census geography
for unique_geoid in va2n_polys_df[poly_key].unique():
subset = va2n_polys_df[va2n_polys_df[poly_key] == unique_geoid]
subset_area = subset.area.sum()
va2n_polys_df.loc[subset.index, geo_area] = subset_area
# calculate va2n_polys / original_poly area ratio
va2n_polys_df[rat_area] = va2n_polys_df.area\
/ va2n_polys_df[geo_area]
# set va2n population proportion based on area ratio
va2n_polys_df[va2n_col] = va2n_polys_df[rat_area]\
* va2n_polys_df[poly_pop]
# generate high-precision ID (polygon, segment, index)
va2n_polys_df[va2n_id] = np.vectorize(high_precision_id)\
(va2n_polys_df[poly_key],
va2n_polys_df[sid_name],
va2n_polys_df.index)
va2n_polys_df.to_file(va2n_poly_file)
else:
va2n_polys_df = gpd.read_file(va2n_poly_file)
va2n_polys_df = utils.set_crs(va2n_polys_df, proj_init=proj_init)
del lvd
# -- va2n Points
va2n_point_file = '%sva2n_%s_%s%s' % (alloc_dir, initial_rho,
area, file_type)
# if the file already exists skip
#va2n_point_file_exists = os.path.exists(va2n_point_file)
va2n_point_file_exists = False
if not va2n_point_file_exists:
# va2n point dataframe
va2n_points_df = gpd.GeoDataFrame(crs=net_segms.crs)
# segments unvisited set
va2n_poly_seg_ids = set(va2n_polys_df[sid_name])
while va2n_poly_seg_ids:
# iterate over network segments
for sid in net_segms[sid_name]:
# evaluate only if the segment is associated
# with an `va2n polygon`
if sid in va2n_poly_seg_ids:
# extract population of polygons
# associated with the segment
pop_bool = (va2n_polys_df[sid_name] == sid)
_pop_va2n_ = va2n_polys_df.loc[pop_bool, va2n_col]
_pop_va2n_ = _pop_va2n_.squeeze()
# convert datatype and sum if more than one value
if type(_pop_va2n_) == float:
pass
elif type(_pop_va2n_) == str:
_pop_va2n_ = float(_pop_va2n_)
else:
_pop_va2n_ = _pop_va2n_.astype(float).sum()
# extract IDs of polygons
# associated with the segment
ids_bool = (va2n_polys_df[sid_name] == sid)
_va2n_ids = list(va2n_polys_df.loc[ids_bool, va2n_id])
# create new unique id based on the `okabe ids`
if len(_va2n_ids) > 1:
_va2n_ids = '-'.join(_va2n_ids)
else:
_va2n_ids = _va2n_ids[0]
# dataframe index of segment
seg_df_idx = (net_segms[sid_name] == sid)
# segment geometry
segment = net_segms.loc[seg_df_idx, geo_col].squeeze()
# segment midpoint
va2n_point = segment.interpolate(.5, normalized=True)
# set up the record and column structure
record = [[sid, va2n_point, _va2n_ids, _pop_va2n_]]
columns = [sid_name, geo_col, va2n_id, va2n_col]
# prepare a temporary dataframe for the points
temp_gdf = gpd.GeoDataFrame(record, columns=columns)
# append temporary dataframe to full dataframe
va2n_points_df = va2n_points_df.append(temp_gdf)
# mark the segments as visited
va2n_poly_seg_ids.remove(sid)
va2n_points_df.reset_index(inplace=True, drop=True)
va2n_points_df = utils.set_crs(va2n_points_df, proj_init=proj_init)
# set xyID
node2xyid = utils.generate_xyid(df=va2n_points_df, geom_type='node',
geo_col=geo_col)
va2n_points_df = utils.fill_frame(va2n_points_df, idx='index',
col=xyid, data=node2xyid)