# Populations

Looking at the number of children in each group per day. Groups are by age and placement type.

In [1]:
import pandas as pd
import csdmpy.timeseries as ts
import csdmpy.populations as pop
import csdmpy.constants as C

In [2]:
df = pd.DataFrame([
    [pd.to_datetime('2021-02-15'), pd.to_datetime('2021-02-28'), 'Starts before, ends before', '10 to 16', C.PlacementCategory.FOSTER],
    [pd.to_datetime('2021-02-15'), pd.to_datetime('2021-03-15'), 'Stars before, ends during', '10 to 16', C.PlacementCategory.RESIDENTIAL],
    [pd.to_datetime('2021-02-15'), pd.to_datetime('2021-04-15'), 'Starts before, ends after', '10 to 16', C.PlacementCategory.RESIDENTIAL],
    [pd.to_datetime('2021-02-15'), None, 'Starts before, no end'],
    [pd.to_datetime('2021-03-15'), pd.to_datetime('2021-03-25'), 'Starts during, ends during', '10 to 16', C.PlacementCategory.FOSTER],
    [pd.to_datetime('2021-03-15'), pd.to_datetime('2021-04-25'), 'Starts during, ends after', '10 to 16', C.PlacementCategory.FOSTER],
    [pd.to_datetime('2021-04-15'), pd.to_datetime('2021-05-25'), 'Starts after, ends after', '10 to 16', C.PlacementCategory.FOSTER],
], columns=['DECOM', 'DEC', 'DESC', 'age_bin', 'placement_type'])
df

Unnamed: 0,DECOM,DEC,DESC,age_bin,placement_type
0,2021-02-15,2021-02-28,"Starts before, ends before",10 to 16,PlacementCategory.FOSTER
1,2021-02-15,2021-03-15,"Stars before, ends during",10 to 16,PlacementCategory.RESIDENTIAL
2,2021-02-15,2021-04-15,"Starts before, ends after",10 to 16,PlacementCategory.RESIDENTIAL
3,2021-02-15,NaT,"Starts before, no end",,
4,2021-03-15,2021-03-25,"Starts during, ends during",10 to 16,PlacementCategory.FOSTER
5,2021-03-15,2021-04-25,"Starts during, ends after",10 to 16,PlacementCategory.FOSTER
6,2021-04-15,2021-05-25,"Starts after, ends after",10 to 16,PlacementCategory.FOSTER


# Legacy approach

There is a legacy approach which involves iterating over each of the intervals and calculating the numbers for that day. The advantage of this method is that it makes it very clear what is going on and the intervals can be customised. The downside is that it is very slow. 

In [3]:
pop.make_populations_ts(df, pd.to_datetime('2021-03-01'), pd.to_datetime('2021-03-31'), step_size=ts.StepSize(1, C.IntervalUnit.DAY))

age_bin,-1 to 1,-1 to 1,1 to 5,1 to 5,1 to 5,5 to 10,5 to 10,5 to 10,10 to 16,10 to 16,10 to 16,16 to 18,16 to 18,16 to 18,16 to 18
placement_type,PlacementCategory.FOSTER,PlacementCategory.OTHER,PlacementCategory.FOSTER,PlacementCategory.RESIDENTIAL,PlacementCategory.OTHER,PlacementCategory.FOSTER,PlacementCategory.RESIDENTIAL,PlacementCategory.OTHER,PlacementCategory.FOSTER,PlacementCategory.RESIDENTIAL,PlacementCategory.OTHER,PlacementCategory.FOSTER,PlacementCategory.RESIDENTIAL,PlacementCategory.SUPPORTED,PlacementCategory.OTHER
2021-03-01,0,0,0,0,0,0,0,0,0.0,2.0,0,0,0,0,0
2021-03-02,0,0,0,0,0,0,0,0,0.0,2.0,0,0,0,0,0
2021-03-03,0,0,0,0,0,0,0,0,0.0,2.0,0,0,0,0,0
2021-03-04,0,0,0,0,0,0,0,0,0.0,2.0,0,0,0,0,0
2021-03-05,0,0,0,0,0,0,0,0,0.0,2.0,0,0,0,0,0
2021-03-06,0,0,0,0,0,0,0,0,0.0,2.0,0,0,0,0,0
2021-03-07,0,0,0,0,0,0,0,0,0.0,2.0,0,0,0,0,0
2021-03-08,0,0,0,0,0,0,0,0,0.0,2.0,0,0,0,0,0
2021-03-09,0,0,0,0,0,0,0,0,0.0,2.0,0,0,0,0,0
2021-03-10,0,0,0,0,0,0,0,0,0.0,2.0,0,0,0,0,0


# New approach: Using transitions and interpolating populations

The new approach instead just calculates the transitions based on start and end of episodes, and then simply sum these to calculate absolute numbers for each group.

In [4]:
pop.get_daily_pops_new_way(df, pd.to_datetime('2021-03-01'), pd.to_datetime('2021-03-31'))

age_bin,10 to 16,10 to 16
placement_type,PlacementCategory.FOSTER,PlacementCategory.RESIDENTIAL
date,Unnamed: 1_level_2,Unnamed: 2_level_2
2021-03-01,0.0,2.0
2021-03-02,0.0,2.0
2021-03-03,0.0,2.0
2021-03-04,0.0,2.0
2021-03-05,0.0,2.0
2021-03-06,0.0,2.0
2021-03-07,0.0,2.0
2021-03-08,0.0,2.0
2021-03-09,0.0,2.0
2021-03-10,0.0,2.0


We can step through this in more detail. First we extract all the start and end dates.

In [5]:
beginnings = pop._group_and_count_dates(df, "DECOM", "nof_decoms")
endings = pop._group_and_count_dates(df, "DEC", "nof_decs")
beginnings

date        placement_type                 age_bin 
2021-02-15  PlacementCategory.FOSTER       10 to 16    1
            PlacementCategory.RESIDENTIAL  10 to 16    2
2021-03-15  PlacementCategory.FOSTER       10 to 16    2
2021-04-15  PlacementCategory.FOSTER       10 to 16    1
Name: nof_decoms, dtype: int64

We can then combine these to show to and from transitions in each group

In [6]:
pops = pd.merge(left=beginnings, right=endings, left_index=True, right_index=True, how="outer")
pops = pops.fillna(0).sort_values("date")
pops

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,nof_decoms,nof_decs
date,placement_type,age_bin,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-02-15,PlacementCategory.FOSTER,10 to 16,1.0,0.0
2021-02-15,PlacementCategory.RESIDENTIAL,10 to 16,2.0,0.0
2021-02-28,PlacementCategory.FOSTER,10 to 16,0.0,1.0
2021-03-15,PlacementCategory.FOSTER,10 to 16,2.0,0.0
2021-03-15,PlacementCategory.RESIDENTIAL,10 to 16,0.0,1.0
2021-03-25,PlacementCategory.FOSTER,10 to 16,0.0,1.0
2021-04-15,PlacementCategory.FOSTER,10 to 16,1.0,0.0
2021-04-15,PlacementCategory.RESIDENTIAL,10 to 16,0.0,1.0
2021-04-25,PlacementCategory.FOSTER,10 to 16,0.0,1.0
2021-05-25,PlacementCategory.FOSTER,10 to 16,0.0,1.0


This can then be turned into transition numbers

In [7]:
transitions = pops["nof_decoms"] - pops["nof_decs"]
transitions

date        placement_type                 age_bin 
2021-02-15  PlacementCategory.FOSTER       10 to 16    1.0
            PlacementCategory.RESIDENTIAL  10 to 16    2.0
2021-02-28  PlacementCategory.FOSTER       10 to 16   -1.0
2021-03-15  PlacementCategory.FOSTER       10 to 16    2.0
            PlacementCategory.RESIDENTIAL  10 to 16   -1.0
2021-03-25  PlacementCategory.FOSTER       10 to 16   -1.0
2021-04-15  PlacementCategory.FOSTER       10 to 16    1.0
            PlacementCategory.RESIDENTIAL  10 to 16   -1.0
2021-04-25  PlacementCategory.FOSTER       10 to 16   -1.0
2021-05-25  PlacementCategory.FOSTER       10 to 16   -1.0
dtype: float64

And sum these to calculate the total numbers for each transition date.

In [8]:
total_counts = transitions.groupby(["placement_type", "age_bin"]).cumsum().unstack(["age_bin", "placement_type"])
total_counts

age_bin,10 to 16,10 to 16
placement_type,PlacementCategory.FOSTER,PlacementCategory.RESIDENTIAL
date,Unnamed: 1_level_2,Unnamed: 2_level_2
2021-02-15,1.0,2.0
2021-02-28,0.0,
2021-03-15,2.0,1.0
2021-03-25,1.0,
2021-04-15,2.0,0.0
2021-04-25,1.0,
2021-05-25,0.0,


Now resample for each date, and forward fill the gaps

In [9]:
daily_counts = total_counts.resample("D").first().fillna(method="ffill")
daily_counts

age_bin,10 to 16,10 to 16
placement_type,PlacementCategory.FOSTER,PlacementCategory.RESIDENTIAL
date,Unnamed: 1_level_2,Unnamed: 2_level_2
2021-02-15,1.0,2.0
2021-02-16,1.0,2.0
2021-02-17,1.0,2.0
2021-02-18,1.0,2.0
2021-02-19,1.0,2.0
...,...,...
2021-05-21,1.0,0.0
2021-05-22,1.0,0.0
2021-05-23,1.0,0.0
2021-05-24,1.0,0.0


This can now be truncated to give the expected range.