Skip to content

Commit

Permalink
assign person number in cdapPersonArray preference order
Browse files Browse the repository at this point in the history
  • Loading branch information
toliwaga committed Sep 26, 2016
1 parent 7fc1fe9 commit ce93c5c
Showing 1 changed file with 49 additions and 11 deletions.
60 changes: 49 additions & 11 deletions activitysim/cdap/xdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,36 +37,74 @@ def _run_cdap(
WORKER_PTYPES = [1, 2]
CHILD_PTYPES = [6, 7, 8]

CDAP_WORKER = 1
CDAP_CHILD = 2
CDAP_BACKFILL = 3
CDAP_UNASSIGNED = 9
people['cdap_person'] = CDAP_UNASSIGNED
RANK_WORKER = 1
RANK_CHILD = 2
RANK_BACKFILL = 3
RANK_UNASSIGNED = 9
people['cdap_rank'] = RANK_UNASSIGNED

# choose up to 2 workers, preferring full over part, older over younger
workers = people.loc[people[p_type_col].isin(WORKER_PTYPES),
['household_id', 'ptype', 'age']]\
.sort_values(by=['household_id', 'ptype', 'age'], ascending=[True, True, False])\
.groupby(hh_id_col).head(2)
# tag the selected workers
people.loc[workers.index, 'cdap_person'] = CDAP_WORKER
people.loc[workers.index, 'cdap_rank'] = RANK_WORKER
del workers

# choose up to 3, preferring youngest
children = people.loc[people[p_type_col].isin(CHILD_PTYPES),
['household_id', 'ptype', 'age']]\
.sort_values(by=['household_id', 'age'], ascending=[True, True])\
.groupby(hh_id_col).head(3)
# tag the selected children
people.loc[children.index, 'cdap_person'] = CDAP_CHILD
people.loc[children.index, 'cdap_rank'] = RANK_CHILD
del children

# choose up to 5, preferring anyone already chosen
others = people[['household_id', 'cdap_person']]\
.sort_values(by=['household_id', 'cdap_person'], ascending=[True, True])\
others = people[['household_id', 'cdap_rank']]\
.sort_values(by=['household_id', 'cdap_rank'], ascending=[True, True])\
.groupby(hh_id_col).head(5)
# tag the backfilled persons
people.loc[others[others.cdap_person == CDAP_UNASSIGNED].index, 'cdap_person'] = CDAP_BACKFILL
people.loc[others[others.cdap_rank == RANK_UNASSIGNED].index, 'cdap_rank'] \
= RANK_BACKFILL
del others

# FIXME - possible workaround if below too big/slow
# stackoverflow.com/questions/26720916/faster-way-to-rank-rows-in-subgroups-in-pandas-dataframe
# Working with a big DataFrame (13 million lines), the method rank with groupby
# maxed out my 8GB of RAM an it took a really long time. I found a workaround
# less greedy in memory , that I put here just in case:
# df.sort_values('value')
# tmp = df.groupby('group').size()
# rank = tmp.map(range)
# rank =[item for sublist in rank for item in sublist]
# df['rank'] = rank

# FIXME - redundant chose between column in people or cdapPersonArray df
# assign person number in cdapPersonArray preference order
# i.e. convert cdap_rank from category to index in order of category rank within household
cdapPersonArray = people.loc[people[p_type_col] < RANK_UNASSIGNED, ['household_id', 'ptype']]
cdapPersonArray['cdap_pnum'] = people\
.sort_values(by=['household_id', 'cdap_rank', 'age'], ascending=[True, True, True])\
.groupby(hh_id_col)['household_id']\
.rank(method='first', na_option='top')\
.astype(int)

tracing.trace_df(cdapPersonArray,
'%s.cdapPersonArray' % trace_label,
transpose=False,
slicer='NONE')

# FIXME - redundant chose between column in people or cdapPersonArray df
# assign person number in cdapPersonArray preference order
people['cdap_rank'] = people\
.sort_values(by=['household_id', 'cdap_rank', 'age'], ascending=[True, True, True])\
.groupby(hh_id_col)['household_id']\
.rank(method='first', na_option='top')\
.astype(int)

tracing.trace_df(people[['household_id', 'PERSONS', 'ptype', 'age', 'cdap_person']],
tracing.trace_df(people[['household_id', 'PERSONS', 'ptype', 'age', 'cdap_rank']],
'%s.people' % trace_label,
transpose=False,
slicer='NONE')
Expand Down

0 comments on commit ce93c5c

Please sign in to comment.