In [6]:
import os
import pandas as pd
import random
from copy import deepcopy
from collections import defaultdict
import geopandas as gp
from matplotlib import pyplot as plt
from pam import read


## Load Data
Here we load simple travel diary data of London commuters. This is a very simple 0.1% sample of data about work and education commutes from the 2011 census. Because we're sharing this date - we've aggregated locations to borough level and randomized personal attributes - so don't get too excited about the results.

The data is available in `pam/notebooks/data/example_data`.

In [7]:
data_path = os.path.join("data", "example_data")
trips = pd.read_csv(os.path.join(data_path, 'example_travel_diaries.csv'))
attributes = pd.read_csv(os.path.join(data_path , 'example_attributes.csv'))
attributes.set_index('pid', inplace=True)

In [8]:
attributes.head()

Unnamed: 0_level_0,gender,job,occ,inc
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
census_0,female,work,white,low
census_1,female,work,white,low
census_2,male,work,blue,high
census_3,male,work,blue,low
census_4,male,work,blue,medium


In [9]:
trips.head()

Unnamed: 0,uid,pid,hid,seq,hzone,ozone,dzone,purp,mode,tst,tet,freq
0,0,census_0,census_0,0,Harrow,Harrow,Camden,work,pt,444,473,1000
1,1,census_0,census_0,1,Harrow,Camden,Harrow,work,pt,890,919,1000
2,2,census_1,census_1,0,Greenwich,Greenwich,Tower Hamlets,work,pt,507,528,1000
3,3,census_1,census_1,1,Greenwich,Tower Hamlets,Greenwich,work,pt,1065,1086,1000
4,4,census_2,census_2,0,Croydon,Croydon,Croydon,work,pt,422,425,1000


In [10]:
trips['purp'].unique(), trips['mode'].unique()

(array(['work', 'education'], dtype=object),
 array(['pt', 'car'], dtype=object))

## Imagine we had better data...

Conjure up additional data to aid demonstration.

We add:
- `age` to person attributes
- activities: 
    - `leisure`
    - `health`
    - `shopping`
    - `escort`

and we put people in shared households based on shared hzone (originally it's one person per household)

**Randomly.**

In [11]:
import numpy as np

# add age column to attributes
attributes['age'] = [int(a) for a in np.random.normal(40.5, 10, len(attributes))]

In [12]:
# add some extra activities to trips
zones = list(set(trips['hzone'].unique()) | set(trips['ozone'].unique()) | set(trips['dzone'].unique()))
purp = ['leisure', 'health', 'shopping', 'escort']
mode = ['pt', 'car', 'walk', 'bike']

def enrich_activities(group):
    new_group = pd.DataFrame(columns=group.columns)
    for idx in group.index:
        trip = group.loc[idx, :]
        try:
            next_start = group.loc[idx+1, :]['tst']
        except KeyError:
            next_start = 1439
        new_group = new_group.append(trip)
        # append a random activity
        activity_loc = random.choice(zones)
        activity_purp = random.choice(purp)
        activity_mode = random.choice(mode)

        act_times = [random.randint(trip['tet']+1, next_start-1) for i in range(4)]
        act_times.sort()

        new_group = new_group.append(pd.DataFrame(
            {'uid': ['{}_act_to'.format(trip['uid']), '{}_act_from'.format(trip['uid'])], 
             'pid': [trip['pid'], trip['pid']], 'hid': [trip['hid'], trip['hid']], 
             'seq': [0, 0], 'hzone': [trip['hzone'], trip['hzone']], 'ozone': [trip['dzone'], activity_loc], 
             'dzone': [activity_loc, trip['ozone']], 'purp': [activity_purp, activity_purp], 
             'mode': [activity_mode, activity_mode], 'tst': [act_times[0], act_times[2]], 
             'tet': [act_times[1], act_times[3]], 'freq': [trip['freq'], trip['freq']]}))
    new_group = new_group.reset_index(drop=True)
    new_group = new_group.drop(['seq'], axis=1)
    new_group = new_group.rename_axis('seq').reset_index()
    return new_group

new_trips = trips.groupby('pid').apply(enrich_activities).reset_index(drop=True)

In [13]:
# generate some households (as opposed one person to a household)
# bunch up people who share the same hzone
import uuid

def bunch_up_people(group):
    group['pid'].unique()
    households = []
    i = 1
    household= []
    h_size = random.randint(1,5)
    for p in group['pid'].unique():
        household.append(p)
        i += 1

        if i>h_size:
            households.append(household)
            h_size = random.randint(1,5)
            household= []
            i = 1

    return dict(zip([str(uuid.uuid4()) for i in range(len(households))], households))

In [14]:
households_map = {}
households_per_zone = new_trips.groupby('hzone').apply(bunch_up_people)
for idx in households_per_zone.index:
    households_map = {**households_map, **households_per_zone[idx]}

In [15]:
# ze old switcheroo person to household id
new_keys = []
new_values = []
for key, value in households_map.items():
    for item in value:
        new_keys.append(item)
        new_values.append(key.lower())
person_to_household_map = dict(zip(new_keys, new_values))

In [16]:
dict(list(person_to_household_map.items())[0:15])

{'census_1016': '862b4141-55b5-43d9-9548-a9ae350bfe85',
 'census_1043': '862b4141-55b5-43d9-9548-a9ae350bfe85',
 'census_1197': '862b4141-55b5-43d9-9548-a9ae350bfe85',
 'census_1247': '862b4141-55b5-43d9-9548-a9ae350bfe85',
 'census_1272': 'eb1e41dc-88a1-4c3b-9762-104113b7933f',
 'census_128': 'eb1e41dc-88a1-4c3b-9762-104113b7933f',
 'census_1281': 'eb1e41dc-88a1-4c3b-9762-104113b7933f',
 'census_1287': 'eb1e41dc-88a1-4c3b-9762-104113b7933f',
 'census_1375': '04dbf381-63df-4cba-affd-4a84a298767b',
 'census_1418': '04dbf381-63df-4cba-affd-4a84a298767b',
 'census_1420': '04dbf381-63df-4cba-affd-4a84a298767b',
 'census_1536': '9308ca3a-4b77-4746-9201-c40198a2e48b',
 'census_1665': '31517a7f-0b7d-4b59-894c-18b72bcbcd72',
 'census_1670': '31517a7f-0b7d-4b59-894c-18b72bcbcd72',
 'census_1690': 'e9e387f5-4c41-49b1-9d86-b112cc63653b'}

In [17]:
new_trips['hid'] = new_trips['pid'].map(person_to_household_map)

## Input data after changes 

In [18]:
attributes.head()

Unnamed: 0_level_0,gender,job,occ,inc,age
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
census_0,female,work,white,low,36
census_1,female,work,white,low,46
census_2,male,work,blue,high,49
census_3,male,work,blue,low,31
census_4,male,work,blue,medium,43


In [19]:
new_trips.head(10)

Unnamed: 0,seq,uid,pid,hid,hzone,ozone,dzone,purp,mode,tst,tet,freq
0,0,0,census_0,165ffd84-c6fa-4d38-83c7-de0d2c1fc903,Harrow,Harrow,Camden,work,pt,444,473,1000
1,1,0_act_to,census_0,165ffd84-c6fa-4d38-83c7-de0d2c1fc903,Harrow,Camden,Kingston upon Thames,escort,bike,616,797,1000
2,2,0_act_from,census_0,165ffd84-c6fa-4d38-83c7-de0d2c1fc903,Harrow,Kingston upon Thames,Harrow,escort,bike,845,872,1000
3,3,1,census_0,165ffd84-c6fa-4d38-83c7-de0d2c1fc903,Harrow,Camden,Harrow,work,pt,890,919,1000
4,4,1_act_to,census_0,165ffd84-c6fa-4d38-83c7-de0d2c1fc903,Harrow,Harrow,Kingston upon Thames,escort,car,1094,1168,1000
5,5,1_act_from,census_0,165ffd84-c6fa-4d38-83c7-de0d2c1fc903,Harrow,Kingston upon Thames,Camden,escort,car,1423,1438,1000
6,0,2,census_1,efb0b187-c6e6-4c1f-abe1-c27079234905,Greenwich,Greenwich,Tower Hamlets,work,pt,507,528,1000
7,1,2_act_to,census_1,efb0b187-c6e6-4c1f-abe1-c27079234905,Greenwich,Tower Hamlets,Haringey,escort,car,642,729,1000
8,2,2_act_from,census_1,efb0b187-c6e6-4c1f-abe1-c27079234905,Greenwich,Haringey,Greenwich,escort,car,740,960,1000
9,3,3,census_1,efb0b187-c6e6-4c1f-abe1-c27079234905,Greenwich,Tower Hamlets,Greenwich,work,pt,1065,1086,1000


In [20]:
new_trips.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17556 entries, 0 to 17555
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   seq     17556 non-null  int64 
 1   uid     17556 non-null  object
 2   pid     17556 non-null  object
 3   hid     17316 non-null  object
 4   hzone   17556 non-null  object
 5   ozone   17556 non-null  object
 6   dzone   17556 non-null  object
 7   purp    17556 non-null  object
 8   mode    17556 non-null  object
 9   tst     17556 non-null  object
 10  tet     17556 non-null  object
 11  freq    17556 non-null  object
dtypes: int64(1), object(11)
memory usage: 1.6+ MB


In [21]:
new_trips['purp'].unique(), new_trips['mode'].unique()

(array(['work', 'escort', 'health', 'leisure', 'shopping', 'education'],
       dtype=object),
 array(['pt', 'bike', 'car', 'walk'], dtype=object))

In [22]:
def print_simple_stats(population):
    """
    Print some simple population statistics.
    """
    time_at_home = 0
    travel_time = 0 
    low_income_central_trips = 0
    high_income_central_trips = 0
    
    for hid, hh in population.households.items():
        for pid, person in hh.people.items():
            freq = person.freq
            
            for p in person.plan:
                if p.act == 'travel':
                    duration = p.duration.seconds * freq / 3600
                    travel_time += duration
                    
                    if p.end_location.area == "Westminster,City of London":
                        if person.attributes['inc'] == "low":
                            low_income_central_trips += freq
                            
                        elif person.attributes['inc'] == "high":
                            high_income_central_trips += freq
                    
                else:  # activity
                    if p.act == 'home':
                        duration = p.duration.seconds * freq / 3600
                        time_at_home += duration
                        
    print(f"Population total time at home: {time_at_home/1000000:.2f} million hours")
    print(f"Population total travel time: {travel_time/1000000:.2f} million hours")
    print(f"Low income trips to Central London: {low_income_central_trips} trips")
    print(f"High income trips to Central London: {high_income_central_trips} trips")

## Create the population

In [60]:
population = read.load_travel_diary(new_trips, attributes, trip_freq_as_person_freq=True)

Using tour based purpose parser using (recommended)
Using freq of 'None' for all trips.


In [61]:
rando_hh_id = list(households_map.keys())[0]
rando_pid_in_hh = list(population.households[rando_hh_id].people)[0]

In [62]:
population.households[rando_hh_id].people

{'census_1016': <pam.core.Person at 0x11e12ab00>,
 'census_1043': <pam.core.Person at 0x11e12ac50>,
 'census_1197': <pam.core.Person at 0x11eb06518>,
 'census_1247': <pam.core.Person at 0x11eb1cfd0>}

In [63]:
population.households[rando_hh_id].people[rando_pid_in_hh].plan.print()

0:	Activity(0 act:home, location:Barking and Dagenham, time:00:00:00 --> 07:21:00, duration:7:21:00)
1:	Leg(0 mode:car, area:Barking and Dagenham --> Barking and Dagenham, time:07:21:00 --> 07:30:00, duration:0:09:00)
2:	Activity(1 act:work, location:Barking and Dagenham, time:07:30:00 --> 09:35:00, duration:2:05:00)
3:	Leg(1 mode:walk, area:Barking and Dagenham --> Croydon, time:09:35:00 --> 13:06:00, duration:3:31:00)
4:	Activity(2 act:escort, location:Croydon, time:13:06:00 --> 14:19:00, duration:1:13:00)
5:	Leg(2 mode:walk, area:Croydon --> Barking and Dagenham, time:14:19:00 --> 15:13:00, duration:0:54:00)
6:	Activity(3 act:work, location:Barking and Dagenham, time:15:13:00 --> 16:02:00, duration:0:49:00)
7:	Leg(3 mode:car, area:Barking and Dagenham --> Barking and Dagenham, time:16:02:00 --> 16:11:00, duration:0:09:00)
8:	Activity(4 act:home, location:Barking and Dagenham, time:16:11:00 --> 17:14:00, duration:1:03:00)
9:	Leg(4 mode:walk, area:Barking and Dagenham --> Westminster,

In [64]:
person = population.households[rando_hh_id].people[rando_pid_in_hh]

In [65]:
len(list(person.activities))

7

In [66]:
population.households[rando_hh_id].people[rando_pid_in_hh].attributes

{'gender': 'other', 'job': 'work', 'occ': 'white', 'inc': 'low', 'age': 37}

In [67]:
print_simple_stats(population)

Population total time at home: 33.12 million hours
Population total travel time: 19.14 million hours
Low income trips to Central London: 504000 trips
High income trips to Central London: 537000 trips


# PAM Simple Policies

Based on [link](https://docs.google.com/spreadsheets/d/1FQMa7dLe2cv1NEZnbu5cZo3v07tKXINwvOaLQYoEp-M/edit#gid=0)

In [68]:
from pam.policy import *

## Household Quarantine

Probabilistically apply quarantine to a household (remove all activities - stay at home) 

### Household-based

If you have a probability of a household having to be quarantined

In [69]:
policy_household_quarantine_per_household = \
    HouseholdQuarantined(
        HouseholdProbability(0.01)
    )

In [70]:
print_simple_stats(apply_policies(population, [policy_household_quarantine_per_household]))

Population total time at home: 33.03 million hours
Population total travel time: 19.07 million hours
Low income trips to Central London: 504000 trips
High income trips to Central London: 537000 trips


### Person-based

If you have a probability of any one person living in the household having to be quarantined.

The probability of the household being quarantined is then $1 - (1-P)^n$, where $P$ is the probability any one person being quarantined and $n$ is the number of people in the household; $(1-P)^n$ is the probability of no one having to be quarantined.

In [71]:
policy_household_quarantine_per_person = \
    HouseholdQuarantined(
        PersonProbability(0.01)
    )

In [72]:
print_simple_stats(apply_policies(population, [policy_household_quarantine_per_person]))

Population total time at home: 32.22 million hours
Population total travel time: 18.61 million hours
Low income trips to Central London: 491000 trips
High income trips to Central London: 520000 trips


(Should be equivalent to `RemoveActivity` removing all non-home activities with `policy_type='household'`, and `probability_level='person'`)

In [73]:
policy_remove_any_education = \
    RemoveHouseholdActivities(
        ['work', 'leisure', 'shopping', 'health', 'education', 'escort'], 
        PersonProbability(0.01)
)

print_simple_stats(apply_policies(population, [policy_household_quarantine_per_person]))

Population total time at home: 31.93 million hours
Population total travel time: 18.39 million hours
Low income trips to Central London: 494000 trips
High income trips to Central London: 514000 trips


## Remove Higher Education

Remove all education activity for persons over age of 17

In [74]:
def age_condition_over_17(attribute_value):
    return attribute_value > 17

policy_remove_higher_education = \
    RemovePersonActivities(
        ['education'],
        1,
        PersonAttributeFilter({'age': age_condition_over_17}, how='all')
)

In [75]:
print_simple_stats(apply_policies(population, [policy_remove_higher_education]))

Population total time at home: 35.40 million hours
Population total travel time: 17.97 million hours
Low income trips to Central London: 472000 trips
High income trips to Central London: 493000 trips


## Remove Education

Probabilistically remove education activities from a person

In [76]:
policy_remove_any_education = \
    RemoveHouseholdActivities(
        ['education', 'escort'], 
        PersonProbability(0.95)
)

In [77]:
print_simple_stats(apply_policies(population, [policy_remove_any_education]))

Population total time at home: 40.75 million hours
Population total travel time: 14.81 million hours
Low income trips to Central London: 402000 trips
High income trips to Central London: 433000 trips


#### Affecting different activities

You can choose to remove different activities to the ones that carry a probability. Here, education activities affect removal of both education and escort on a household level.

In [78]:
policy_remove_any_education = \
    RemoveHouseholdActivities(
        ['education', 'escort'], 
        ActivityProbability(['education'], 0.95)
)

In [79]:
print_simple_stats(apply_policies(population, [policy_remove_any_education]))

Population total time at home: 38.02 million hours
Population total travel time: 16.33 million hours
Low income trips to Central London: 435000 trips
High income trips to Central London: 458000 trips


## Remove Leisure Activities

Remove all leisure activities

In [80]:
policy_remove_leisure = \
    RemovePersonActivities(
        ['leisure'], 
        1
)

In [81]:
print_simple_stats(apply_policies(population, [policy_remove_leisure]))

Population total time at home: 38.37 million hours
Population total travel time: 16.13 million hours
Low income trips to Central London: 444000 trips
High income trips to Central London: 478000 trips


## Remove Health Activities

Probabilistically remove **individual** health activities from a person

In [82]:
policy_remove_health = \
    RemoveIndividualActivities(
        ['health'], 
        0.5
)

In [83]:
print_simple_stats(apply_policies(population, [policy_remove_health]))

Population total time at home: 35.77 million hours
Population total travel time: 17.43 million hours
Low income trips to Central London: 468000 trips
High income trips to Central London: 502000 trips


## Unemployment/Furlough

Probabilistically remove all work activities from a person

In [84]:
policy_unemployment_and_furlough = \
    RemovePersonActivities(
        ['work'], 
        0.1
)

In [85]:
print_simple_stats(apply_policies(population, [policy_unemployment_and_furlough]))

Population total time at home: 33.98 million hours
Population total travel time: 18.72 million hours
Low income trips to Central London: 489000 trips
High income trips to Central London: 519000 trips


## Work from Home

Probabilistically remove all work activities from a person

In [86]:
policy_work_from_home = \
    RemovePersonActivities(
        ['work'], 
        0.5
)

In [87]:
print_simple_stats(apply_policies(population, [policy_work_from_home]))

Population total time at home: 37.48 million hours
Population total travel time: 16.97 million hours
Low income trips to Central London: 409000 trips
High income trips to Central London: 443000 trips


## Reduced Work Activity

Probabilistically remove **individual** work activities from a person

In [88]:
policy_reduced_work_activity = \
    RemovePersonActivities(
        ['work'], 
        ActivityProbability(['work'], 0.2)
)

In [89]:
print_simple_stats(apply_policies(population, [policy_reduced_work_activity]))

Population total time at home: 35.49 million hours
Population total travel time: 17.92 million hours
Low income trips to Central London: 466000 trips
High income trips to Central London: 498000 trips


## Remove Shopping 

Probabilistically remove **individual** shopping activities from a person

In [90]:
policy_remove_shopping = \
    RemoveIndividualActivities(
        ['shop'], 
        0.5
)

In [91]:
print_simple_stats(apply_policies(population, [policy_remove_shopping]))

Population total time at home: 33.12 million hours
Population total travel time: 19.14 million hours
Low income trips to Central London: 504000 trips
High income trips to Central London: 537000 trips


## Move Shopping to home location

Probabilistically move shopping activity tours to person's home location

In [92]:
policy_move_shopping = \
    MovePersonActivitiesToHome(
        ['shop'], 
        0.5
)

In [93]:
print_simple_stats(apply_policies(population, [policy_move_shopping]))

Population total time at home: 33.12 million hours
Population total travel time: 19.14 million hours
Low income trips to Central London: 504000 trips
High income trips to Central London: 537000 trips


## Reduce Shopping activities

Probabilistically shopping activities from all but one (randomly chosen) person in household

In [94]:
policy_reduce_shopping = \
    ReduceSharedHouseholdActivities(
        ['shop'], 
        0.5
)

In [95]:
print_simple_stats(apply_policies(population, [policy_reduce_shopping]))

Population total time at home: 33.12 million hours
Population total travel time: 19.14 million hours
Low income trips to Central London: 504000 trips
High income trips to Central London: 537000 trips


## All together now!

In [96]:
all_together_pop = apply_policies(
    population, 
    [policy_household_quarantine_per_person, 
     policy_reduce_shopping,
     policy_move_shopping,
     policy_remove_higher_education, 
     policy_remove_any_education, 
     policy_remove_leisure, 
     policy_remove_health, 
     policy_unemployment_and_furlough, 
     policy_work_from_home, 
     policy_reduced_work_activity, 
     policy_remove_shopping])

print_simple_stats(all_together_pop)

Population total time at home: 46.11 million hours
Population total travel time: 9.97 million hours
Low income trips to Central London: 237000 trips
High income trips to Central London: 262000 trips
