In [47]:
import os
import pandas as pd
import random
from copy import deepcopy
from collections import defaultdict
import geopandas as gp
from matplotlib import pyplot as plt
from pam import parse

path_to_repo = '/Users/.../PythonProjects/pam'

## Load Data
Here we load simple travel diary data of London commuters. This is a very simple 0.1% sample of data about work and education commutes from the 2011 census. Because we're sharing this date - we've aggregated locations to borough level and randomized personal attributes - so don't get too excited about the results.

The data is available in `pam/example_data`.

In [2]:
trips = pd.read_csv(os.path.join(path_to_repo, 'example_data', 'example_travel_diaries.csv'))
attributes = pd.read_csv(os.path.join(path_to_repo, 'example_data' , 'example_attributes.csv'))
attributes.set_index('pid', inplace=True)

In [3]:
attributes.head()

Unnamed: 0_level_0,gender,job,occ,inc
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
census_0,female,work,white,low
census_1,female,work,white,low
census_2,male,work,blue,high
census_3,male,work,blue,low
census_4,male,work,blue,medium


In [4]:
trips.head()

Unnamed: 0,uid,pid,hid,seq,hzone,ozone,dzone,purp,mode,tst,tet,freq
0,0,census_0,census_0,0,Harrow,Harrow,Camden,work,pt,444,473,1000
1,1,census_0,census_0,1,Harrow,Camden,Harrow,work,pt,890,919,1000
2,2,census_1,census_1,0,Greenwich,Greenwich,Tower Hamlets,work,pt,507,528,1000
3,3,census_1,census_1,1,Greenwich,Tower Hamlets,Greenwich,work,pt,1065,1086,1000
4,4,census_2,census_2,0,Croydon,Croydon,Croydon,work,pt,422,425,1000


In [5]:
trips['purp'].unique(), trips['mode'].unique()

(array(['work', 'education'], dtype=object),
 array(['pt', 'car'], dtype=object))

## Imagine we had better data...

Conjure up additional data to aid demonstration.

We add:
- `age` to person attributes
- activities: 
    - `leisure`
    - `health`
    - `shopping`
    - `escort`

and we put people in shared households based on shared hzone (originally it's one person per household)

**Randomly.**

In [6]:
import numpy as np

# add age column to attributes
attributes['age'] = [int(a) for a in np.random.normal(40.5, 10, len(attributes))]

In [7]:
# add some extra activities to trips
zones = list(set(trips['hzone'].unique()) | set(trips['ozone'].unique()) | set(trips['dzone'].unique()))
purp = ['leisure', 'health', 'shopping', 'escort']
mode = ['pt', 'car', 'walk', 'bike']

def enrich_activities(group):
    new_group = pd.DataFrame(columns=group.columns)
    for idx in group.index:
        trip = group.loc[idx, :]
        try:
            next_start = group.loc[idx+1, :]['tst']
        except KeyError:
            next_start = 1439
        new_group = new_group.append(trip)
        # append a random activity
        activity_loc = random.choice(zones)
        activity_purp = random.choice(purp)
        activity_mode = random.choice(mode)

        act_times = [random.randint(trip['tet']+1, next_start-1) for i in range(4)]
        act_times.sort()

        new_group = new_group.append(pd.DataFrame(
            {'uid': ['{}_act_to'.format(trip['uid']), '{}_act_from'.format(trip['uid'])], 
             'pid': [trip['pid'], trip['pid']], 'hid': [trip['hid'], trip['hid']], 
             'seq': [0, 0], 'hzone': [trip['hzone'], trip['hzone']], 'ozone': [trip['dzone'], activity_loc], 
             'dzone': [activity_loc, trip['ozone']], 'purp': [activity_purp, activity_purp], 
             'mode': [activity_mode, activity_mode], 'tst': [act_times[0], act_times[2]], 
             'tet': [act_times[1], act_times[3]], 'freq': [trip['freq'], trip['freq']]}))
    new_group = new_group.reset_index(drop=True)
    new_group = new_group.drop(['seq'], axis=1)
    new_group = new_group.rename_axis('seq').reset_index()
    return new_group

new_trips = trips.groupby('pid').apply(enrich_activities).reset_index(drop=True)

In [8]:
# generate some households (as opposed one person to a household)
# bunch up people who share the same hzone
import uuid

def bunch_up_people(group):
    group['pid'].unique()
    households = []
    i = 1
    household= []
    h_size = random.randint(1,5)
    for p in group['pid'].unique():
        household.append(p)
        i += 1

        if i>h_size:
            households.append(household)
            h_size = random.randint(1,5)
            household= []
            i = 1

    return dict(zip([str(uuid.uuid4()) for i in range(len(households))], households))

In [9]:
households_map = {}
households_per_zone = new_trips.groupby('hzone').apply(bunch_up_people)
for idx in households_per_zone.index:
    households_map = {**households_map, **households_per_zone[idx]}

In [10]:
# ze old switcheroo person to household id
new_keys = []
new_values = []
for key, value in households_map.items():
    for item in value:
        new_keys.append(item)
        new_values.append(key.lower())
person_to_household_map = dict(zip(new_keys, new_values))

In [11]:
person_to_household_map

{'census_1016': '26d16d0a-3f33-4c68-81e1-95921f035f49',
 'census_1043': '26d16d0a-3f33-4c68-81e1-95921f035f49',
 'census_1197': '26d16d0a-3f33-4c68-81e1-95921f035f49',
 'census_1247': '26d16d0a-3f33-4c68-81e1-95921f035f49',
 'census_1272': '6578beb2-f8aa-4d5a-bddb-8ea6cf734e65',
 'census_128': '6578beb2-f8aa-4d5a-bddb-8ea6cf734e65',
 'census_1281': '6578beb2-f8aa-4d5a-bddb-8ea6cf734e65',
 'census_1287': '6578beb2-f8aa-4d5a-bddb-8ea6cf734e65',
 'census_1375': 'eac41418-2041-457d-8d01-2d5a6a3d5e8b',
 'census_1418': '835a8d44-e04e-47cf-9ca0-e0dd2136d99a',
 'census_1420': '835a8d44-e04e-47cf-9ca0-e0dd2136d99a',
 'census_1536': '48b4355c-0421-49ee-9e15-877c9035e7b0',
 'census_1665': '48b4355c-0421-49ee-9e15-877c9035e7b0',
 'census_1670': '48b4355c-0421-49ee-9e15-877c9035e7b0',
 'census_1690': '48b4355c-0421-49ee-9e15-877c9035e7b0',
 'census_1702': '48b4355c-0421-49ee-9e15-877c9035e7b0',
 'census_1707': '459103b9-9510-4c4f-b60a-b28cb8f7af7e',
 'census_1774': '459103b9-9510-4c4f-b60a-b28cb8f7

In [12]:
new_trips['hid'] = new_trips['pid'].map(person_to_household_map)

## Input data after changes 

In [13]:
attributes.head()

Unnamed: 0_level_0,gender,job,occ,inc,age
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
census_0,female,work,white,low,43
census_1,female,work,white,low,44
census_2,male,work,blue,high,41
census_3,male,work,blue,low,36
census_4,male,work,blue,medium,32


In [14]:
new_trips.head(10)

Unnamed: 0,seq,uid,pid,hid,hzone,ozone,dzone,purp,mode,tst,tet,freq
0,0,0,census_0,703da4e3-39ff-442e-aa69-07675d0dbd4f,Harrow,Harrow,Camden,work,pt,444,473,1000
1,1,0_act_to,census_0,703da4e3-39ff-442e-aa69-07675d0dbd4f,Harrow,Camden,Lewisham,leisure,pt,588,635,1000
2,2,0_act_from,census_0,703da4e3-39ff-442e-aa69-07675d0dbd4f,Harrow,Lewisham,Harrow,leisure,pt,703,768,1000
3,3,1,census_0,703da4e3-39ff-442e-aa69-07675d0dbd4f,Harrow,Camden,Harrow,work,pt,890,919,1000
4,4,1_act_to,census_0,703da4e3-39ff-442e-aa69-07675d0dbd4f,Harrow,Harrow,Redbridge,shopping,walk,1068,1118,1000
5,5,1_act_from,census_0,703da4e3-39ff-442e-aa69-07675d0dbd4f,Harrow,Redbridge,Camden,shopping,walk,1225,1382,1000
6,0,2,census_1,70b5b031-8e11-46d6-ba9d-45c5e18ddec8,Greenwich,Greenwich,Tower Hamlets,work,pt,507,528,1000
7,1,2_act_to,census_1,70b5b031-8e11-46d6-ba9d-45c5e18ddec8,Greenwich,Tower Hamlets,Camden,escort,car,556,568,1000
8,2,2_act_from,census_1,70b5b031-8e11-46d6-ba9d-45c5e18ddec8,Greenwich,Camden,Greenwich,escort,car,699,910,1000
9,3,3,census_1,70b5b031-8e11-46d6-ba9d-45c5e18ddec8,Greenwich,Tower Hamlets,Greenwich,work,pt,1065,1086,1000


In [15]:
new_trips.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17556 entries, 0 to 17555
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   seq     17556 non-null  int64 
 1   uid     17556 non-null  object
 2   pid     17556 non-null  object
 3   hid     17238 non-null  object
 4   hzone   17556 non-null  object
 5   ozone   17556 non-null  object
 6   dzone   17556 non-null  object
 7   purp    17556 non-null  object
 8   mode    17556 non-null  object
 9   tst     17556 non-null  object
 10  tet     17556 non-null  object
 11  freq    17556 non-null  object
dtypes: int64(1), object(11)
memory usage: 1.6+ MB


In [16]:
new_trips['purp'].unique(), new_trips['mode'].unique()

(array(['work', 'leisure', 'shopping', 'escort', 'health', 'education'],
       dtype=object),
 array(['pt', 'walk', 'car', 'bike'], dtype=object))

In [17]:
def print_simple_stats(population):
    """
    Print some simple population statistics.
    """
    time_at_home = 0
    travel_time = 0 
    low_income_central_trips = 0
    high_income_central_trips = 0
    
    for hid, hh in population.households.items():
        for pid, person in hh.people.items():
            freq = person.freq
            
            for p in person.plan:
                if p.act == 'travel':
                    duration = p.duration.seconds * freq / 3600
                    travel_time += duration
                    
                    if p.end_location.area == "Westminster,City of London":
                        if person.attributes['inc'] == "low":
                            low_income_central_trips += freq
                            
                        elif person.attributes['inc'] == "high":
                            high_income_central_trips += freq
                    
                else:  # activity
                    if p.act == 'home':
                        duration = p.duration.seconds * freq / 3600
                        time_at_home += duration
                        
    print(f"Population total time at home: {time_at_home/1000000:.2f} million hours")
    print(f"Population total travel time: {travel_time/1000000:.2f} million hours")
    print(f"Low income trips to Central London: {low_income_central_trips} trips")
    print(f"High income trips to Central London: {high_income_central_trips} trips")

## Create the population

In [18]:
population = parse.load_travel_diary(new_trips, attributes)

In [19]:
rando_hh_id = list(households_map.keys())[0]
rando_pid_in_hh = list(population.households[rando_hh_id].people)[0]

In [20]:
population.households[rando_hh_id].people

{'census_1016': <pam.core.Person at 0x119028fd0>,
 'census_1043': <pam.core.Person at 0x119028750>,
 'census_1197': <pam.core.Person at 0x11902d850>,
 'census_1247': <pam.core.Person at 0x1190359d0>}

In [21]:
population.households[rando_hh_id].people[rando_pid_in_hh].plan.print()

0:	Activity(0 act:home, location:Barking and Dagenham, time:00:00:00 --> 07:21:00, duration:7:21:00)
1:	Leg(0 mode:car, area:Barking and Dagenham --> Barking and Dagenham, time:07:21:00 --> 07:30:00, duration:0:09:00)
2:	Activity(1 act:home, location:Barking and Dagenham, time:07:30:00 --> 09:52:00, duration:2:22:00)
3:	Leg(1 mode:walk, area:Barking and Dagenham --> Wandsworth, time:09:52:00 --> 14:42:00, duration:4:50:00)
4:	Activity(2 act:health, location:Wandsworth, time:14:42:00 --> 15:28:00, duration:0:46:00)
5:	Leg(2 mode:walk, area:Wandsworth --> Barking and Dagenham, time:15:28:00 --> 15:59:00, duration:0:31:00)
6:	Activity(3 act:home, location:Barking and Dagenham, time:15:59:00 --> 16:02:00, duration:0:03:00)
7:	Leg(3 mode:car, area:Barking and Dagenham --> Barking and Dagenham, time:16:02:00 --> 16:11:00, duration:0:09:00)
8:	Activity(4 act:home, location:Barking and Dagenham, time:16:11:00 --> 18:13:00, duration:2:02:00)
9:	Leg(4 mode:car, area:Barking and Dagenham --> Bren

In [22]:
population.households[rando_hh_id].people[rando_pid_in_hh].attributes

{'gender': 'other', 'job': 'work', 'occ': 'white', 'inc': 'low', 'age': 36}

In [23]:
print_simple_stats(population)

Population total time at home: 34.57 million hours
Population total travel time: 18.90 million hours
Low income trips to Central London: 495000 trips
High income trips to Central London: 545000 trips


# PAM Simple Policies

Based on [link](https://docs.google.com/spreadsheets/d/1FQMa7dLe2cv1NEZnbu5cZo3v07tKXINwvOaLQYoEp-M/edit#gid=0)

In [24]:
from pam import modify

def apply_policies(population, policies: list):

    new_population = deepcopy(population) 
    for hid, household in new_population.households.items():
        for policy in policies:
            policy.apply_to(household)
    return new_population

## Household Quarantine

Probabilistically apply quarantine to a household (remove all activities - stay at home) 

### Household-based

If you have a probability of a household having to be quarantined

In [25]:
policy_household_quarantine_per_household = modify.HouseholdQuarantined(
    probability=0.01, 
    person_based=False)

In [26]:
print_simple_stats(apply_policies(population, [policy_household_quarantine_per_household]))

Population total time at home: 34.98 million hours
Population total travel time: 18.67 million hours
Low income trips to Central London: 491000 trips
High income trips to Central London: 536000 trips


### Person-based

If you have a probability of any one person living in the household having to be quarantined.

The probability of the household being quarantined is then $1 - (1-P)^n$, where $P$ is the probability any one person being quarantined and $n$ is the number of people in the household; $(1-P)^n$ is the probability of no one having to be quarantined.

In [27]:
policy_household_quarantine_per_person = modify.HouseholdQuarantined(
    probability=0.01, 
    person_based=True)

In [28]:
print_simple_stats(apply_policies(population, [policy_household_quarantine_per_person]))

Population total time at home: 35.37 million hours
Population total travel time: 18.48 million hours
Low income trips to Central London: 485000 trips
High income trips to Central London: 544000 trips


(Should be equivalent to `RemoveActivity` removing all non-home activities with `policy_type='household'`, and `probability_level='person'`)

In [29]:
policy_remove_any_education = modify.RemoveActivity(
    ['work', 'leisure', 'shopping', 'health', 'education', 'escort'], 
    policy_type='household',
    probability_level='person',
    probability=0.01)

print_simple_stats(apply_policies(population, [policy_household_quarantine_per_person]))

Population total time at home: 35.93 million hours
Population total travel time: 18.13 million hours
Low income trips to Central London: 477000 trips
High income trips to Central London: 523000 trips


## Remove Higher Education

Remove all education activity for persons over age of 17

In [30]:
def age_condition_over_17(attribute_value):
    return attribute_value > 17

policy_remove_higher_education = modify.RemoveActivity(
    ['education'],
    probability=1, 
    policy_type='person',
    probability_level='person',
    attribute_conditions={'age': age_condition_over_17},
    attribute_strict_conditions=True)

In [31]:
print_simple_stats(apply_policies(population, [policy_remove_higher_education]))

Population total time at home: 36.53 million hours
Population total travel time: 17.38 million hours
Low income trips to Central London: 431000 trips
High income trips to Central London: 463000 trips


## Remove Education

Probabilistically remove education activities from a person

In [32]:
policy_remove_any_education = modify.RemoveActivity(
    ['education', 'escort'], 
    policy_type='household',
    probability_level='person',
    probability=0.95)

In [33]:
print_simple_stats(apply_policies(population, [policy_remove_any_education]))

Population total time at home: 41.78 million hours
Population total travel time: 14.65 million hours
Low income trips to Central London: 417000 trips
High income trips to Central London: 440000 trips


## Remove Leisure Activities

Remove all leisure activities

In [34]:
policy_remove_leisure = modify.RemoveActivity(
    ['leisure'], 
    policy_type='person',
    probability_level='person',
    probability=1)

In [35]:
print_simple_stats(apply_policies(population, [policy_remove_leisure]))

Population total time at home: 39.41 million hours
Population total travel time: 16.21 million hours
Low income trips to Central London: 477000 trips
High income trips to Central London: 525000 trips


## Remove Health Activities

Probabilistically remove **individual** health activities from a person

In [36]:
policy_remove_health = modify.RemoveActivity(
    ['health'], 
    policy_type='activity',
    probability_level='activity',
    probability=0.5)

In [37]:
print_simple_stats(apply_policies(population, [policy_remove_health]))

Population total time at home: 36.99 million hours
Population total travel time: 17.58 million hours
Low income trips to Central London: 489000 trips
High income trips to Central London: 538000 trips


## Unemployment/Furlough

Probabilistically remove all work activities from a person

In [38]:
policy_unemployment_and_furlough = modify.RemoveActivity(
    ['work'], 
    policy_type='person',
    probability_level='person',
    probability=0.1)

In [39]:
print_simple_stats(apply_policies(population, [policy_unemployment_and_furlough]))

Population total time at home: 35.16 million hours
Population total travel time: 18.43 million hours
Low income trips to Central London: 453000 trips
High income trips to Central London: 510000 trips


## Work from Home

Probabilistically remove all work activities from a person

In [40]:
policy_work_from_home = modify.RemoveActivity(
    ['work'], 
    policy_type='person',
    probability_level='person',
    probability=0.5)

In [41]:
print_simple_stats(apply_policies(population, [policy_work_from_home]))

Population total time at home: 37.23 million hours
Population total travel time: 16.59 million hours
Low income trips to Central London: 334000 trips
High income trips to Central London: 403000 trips


## Reduced Work Activity

Probabilistically remove **individual** work activities from a person

In [42]:
policy_reduced_work_activity = modify.RemoveActivity(
    ['work'], 
    policy_type='activity',
    probability_level='activity',
    probability=0.2)

In [43]:
print_simple_stats(apply_policies(population, [policy_reduced_work_activity]))

Population total time at home: 35.75 million hours
Population total travel time: 17.83 million hours
Low income trips to Central London: 432000 trips
High income trips to Central London: 472000 trips


## Remove Shopping 

Probabilistically remove **individual** shopping activities from a person

In [44]:
policy_remove_shopping = modify.RemoveActivity(
    ['shop'],
    policy_type='activity',
    probability_level='activity',
    probability=0.5)

In [45]:
print_simple_stats(apply_policies(population, [policy_remove_shopping]))

Population total time at home: 34.57 million hours
Population total travel time: 18.90 million hours
Low income trips to Central London: 495000 trips
High income trips to Central London: 545000 trips


## All together now!

In [46]:
all_together_pop = apply_policies(
    population, 
    [policy_household_quarantine_per_person, 
     policy_remove_higher_education, 
     policy_remove_any_education, 
     policy_remove_leisure, 
     policy_remove_health, 
     policy_unemployment_and_furlough, 
     policy_work_from_home, 
     policy_reduced_work_activity, 
     policy_remove_shopping])

print_simple_stats(all_together_pop)

Population total time at home: 55.34 million hours
Population total travel time: 6.97 million hours
Low income trips to Central London: 194000 trips
High income trips to Central London: 208000 trips
