In [41]:
import pandas as pd
import yaml
import numpy as np
import random

# Agents

In [2]:
# The yaml file has the following tags:
# subculture_id: Subculture A | B | C
# neighbourhood_id: string - needs to map onto scenario yaml
# commute_length: Categorical: LocalCommute | CityCommute | DistantCommute
# commute_length_continuous: float
# weather_sensitivity: float
# consistency: float
# social_connectivity: float
# subculture_connectivity: float
# neighbourhood_connectivity: float
# average_weight: float
# habit: [nested]
# # Walk: 1.0  | Car: 1.0  | PublicTransport: 1.0  | Cycle: 1.0 
# current_mode: Categorical: Walk | Car | PublicTransport | Cycle
# last_mode: Categorical: Walk | Car | PublicTransport | Cycle
# norm: Categorical: Walk | Car | PublicTransport | Cycle
# owns_bike: Boolean
# owns_car: Boolean

In [3]:
# To write the yaml it needs be be structured as a dictionary.
# Therefore we need a function that ingests a row of pandas data and returns a dictionary marked up as above.

def yaml_dict(row,subculture_id, neighbourhood_id, commute_length, commute_length_continuous, weather_sensitivity,
              consistency, social_connectivity, subculture_connectivity, neighbourhood_connectivity, average_weight,
              habit, current_mode, last_mode, norm, owns_bike, owns_car):
    # row is a pandas dataframe row
    # all fields map to a field in the dataframe.
    # NB pyyaml doesn't like numpy numerical objects, cast all to base python (float(), int(), bool())!
    data = {}
    data['subculture_id'] = row[subculture_id]
    data['neighbourhood_id'] = row[neighbourhood_id]
    data['commute_length'] = row[commute_length]
    data['commute_length_continuous'] = float(row[commute_length_continuous])
    data['weather_sensitivity'] = float(row[weather_sensitivity])
    data['consistency'] = float(row[consistency])
    data['social_connectivity'] = float(row[social_connectivity])
    data['subculture_connectivity'] = float(row[subculture_connectivity])
    data['neighbourhood_connectivity'] = float(row[neighbourhood_connectivity])
    data['average_weight'] = float(row[average_weight])
    data['habit'] = {row[habit]: 1.0}
    data['current_mode'] = row[current_mode]
    data['last_mode'] = row[last_mode]
    data['norm'] = row[norm]
    data['owns_bike'] = bool(row[owns_bike])
    data['owns_car'] = bool(row[owns_car])
    
    return data   

In [4]:
# Load agent data
agents = pd.read_csv('..\Data\Synthetic_WF_complete.csv', index_col=0)
agents.head()

Unnamed: 0,index,pidp,sex,age_6cat,eth_5cat,employ,worktrav,caruse,cycleuse,f_workdis,ward,work_dis_class,workdis,sim_dist,trav_class,trav_classnm
0,0,22445,f,25-34,White,Employed,Public,Car,Bike,5,E05000590,1.0,5,3492.721765,0.0,local
1,0,22445,f,25-34,White,Employed,Public,Car,Bike,5,E05000590,1.0,5,12498.866719,1.0,city
2,0,22445,f,25-34,White,Employed,Public,Car,Bike,5,E05000590,1.0,5,12411.0324,1.0,city
3,1,29925,f,35-44,White,SelfEmployed,Car,Car,NoBike,3,E05000590,1.0,3,13794.974969,1.0,city
4,1,29925,f,35-44,White,SelfEmployed,Car,Car,NoBike,3,E05000590,1.0,3,17946.035446,1.0,city


In [5]:
# Subset the data so that we just have commuters
agents = agents[agents['worktrav'].isin(['Walk','Car','Public','Cycle'])]
len(agents)

111166

In [18]:
# We need to recode some of the variables to match the yaml.

agents['mode'] = agents['worktrav'].apply(lambda x: 'PublicTransport' if x == 'Public' else x)
agents['commute'] = agents['trav_classnm'].apply(lambda x: 'LocalCommute' if x == 'local' else ('CityCommute' if x == 'city' else 'DistantCommute'))
agents['owns_bike'] = agents['cycleuse'].apply(lambda x: True if x == 'Bike' else False)
agents['owns_car'] = agents['caruse'].apply(lambda x: True if x == 'Car' else False)

In [28]:
# We also need to generate some psychological variables.

# Subculture - random tertiles
sc = np.array(['Subculture A'] * 37055 + ['Subculture B'] * 37055 + ['Subculture C'] * 37056)
np.random.shuffle(sc)
agents['subculture_id'] = sc

# weather sensitivity - random uniform 0-1
agents['weather_sensitivity'] = np.random.uniform(size=len(agents))

# consistency - constant 1.0
agents['consistency'] = np.ones(len(agents))

# social connectivity - constant 0.7
agents['social_connectivity'] = np.ones(len(agents)) * 0.7

# subculture connectivity - constant 0.5
agents['subculture_connectivity'] = np.ones(len(agents)) * 0.5

# neighbourhood connectivity - constant 0.3
agents['neighbourhood_connectivity'] = np.ones(len(agents)) * 0.3

# Not sure what this is - constant irrational number
agents['average_weight'] = np.ones(len(agents)) * 0.1818181872367859


In [115]:
# transform relevant agent data to yaml appropriate dictionary
yaml_data = agents.apply(lambda x: yaml_dict(x, 'subculture_id', 'ward', 'commute', 'sim_dist', 'weather_sensitivity', 'consistency', 
          'social_connectivity', 'subculture_connectivity', 'neighbourhood_connectivity', 'average_weight',
              'mode', 'mode', 'mode', 'mode', 'owns_bike', 'owns_car'),axis=1).tolist()

In [123]:
# Write the agents data to yaml (takes several minutes for a file of c. 100,000 agents)
with open('1.yaml','w') as outfile:
    yaml.dump(yaml_data,outfile, default_flow_style=False, explicit_start=True)

# Networks

In [72]:
# Social networks could be specifically generated here.
# For now though Motivate can generate a preferential attachment -based network
# Just run on the command line with motivate.exe --generate

# Parameters

In [6]:
# create a yaml with the fixed model parameters
params = {}

params['total_years'] = 5
params['number_of_people'] = len(agents)
params['number_of_simulations'] = 1
params['social_connectivity'] = 0.7
params['subculture_connectivity'] = 0.5
params['neighbourhood_connectivity'] = 0.3
params['number_of_social_network_links'] = 5
params['number_of_neighbour_links'] = 10
params['days_in_habit_average'] = 10
params['distributions'] = []

In [7]:
# Write the parameters data to yaml
with open('parameters.yaml','w') as outfile:
    yaml.dump(params,outfile, default_flow_style=False, explicit_start=True)

# Scenario

In [126]:
# The scenario file contains information on:
# Model ID
# subculture definitions
# neighbourhood definitions
# number of bikes
# number of cars
# intervention

In [178]:
# Set scenario data structure & populate simple keys with values
name = 'basic'
# Derive number of bikes and cars from agent info.
num_bike = int(agents['owns_bike'].sum()) # cast to int for pyyaml.
num_car = int(agents['owns_car'].sum())

yaml_scenario = {'id': name,'subcultures':[], 'neighbourhoods':[],'number_of_bikes': num_bike,'number_of_cars': num_car,
                'intervention': {}}

In [179]:
# set up subcultures
# should map onto subculture names specified in the agents data

# Subculture A
scA = {'id':'Subculture A','desirability':{'PublicTransport': 0.5,'Walk': 0.7,'Car': 0.8, 'Cycle': 0.9}}
yaml_scenario['subcultures'].append(scA)

# Subculture B
scB = {'id':'Subculture B','desirability':{'PublicTransport': 0.8,'Walk': 0.7,'Car': 0.9, 'Cycle': 0.6}}
yaml_scenario['subcultures'].append(scB)

# Subculture C
scC = {'id':'Subculture C','desirability':{'PublicTransport': 0.5,'Walk': 0.9,'Car': 0.4, 'Cycle': 0.9}}
yaml_scenario['subcultures'].append(scC)

In [180]:
# set up neighborhoods
# should map onto the neighbourhood names established in the agents.

# Get the required neighbourhoood ids (ward codes)
nh = agents['ward'].unique()

# We'll setup supportiveness as random draws from a uniform distribution.

# Capacities for walking and cycling are assumed to be effectively unlimited (here set to 150000)
# Car and Public Transport capacities
car = [3725, 2084, 6605, 2375, 1079, 1095, 2460, 1897, 5703, 2503, 4848, 3700, 1229, 6110, 6890, 709, 1828, 3203, 4050, 6243]
pt = [5548, 87, 6856, 5166, 1474, 2078, 5765, 3502, 458, 7467, 2810, 3065, 5054, 181, 3565, 4880, 5221, 6907, 3957, 3026]

nhoods = []
for nix, n in enumerate(nh):
    temp = {}
    temp['id'] = n
    temp['supportiveness'] = {}
    temp['capacity'] = {}
    
    temp['supportiveness']['Car'] = float(np.random.uniform())
    temp['supportiveness']['Cycle'] = float(np.random.uniform())
    temp['supportiveness']['Walk'] = float(np.random.uniform())
    temp['supportiveness']['PublicTransport'] = float(np.random.uniform())
    
    temp['capacity']['Car'] = car[nix]
    temp['capacity']['Cycle'] = 150000
    temp['capacity']['Walk'] = 150000
    temp['capacity']['PublicTransport'] = pt[nix]
    
    nhoods.append(temp)

yaml_scenario['neighbourhoods'] = nhoods

In [181]:
# Set up intervention

# For basic scenario we have no interventions
intervention = {'day': 0,'subculture_changes':[],'change_in_number_of_bikes':0,'change_in_number_of_cars':0,
               'neighbourhood_changes':[],'subculture_changes':[]}

yaml_scenario['intervention'] = intervention

# e.g. neighbourhood changes
# [{'id': '9', 'increase_in_capacity': {'Car': -5000}}, {'id': '4', 'increase_in_capacity': {'Car': -1000}}],

In [186]:
# save intervention to yaml
with open('scenario.yaml','w') as outfile:
    yaml.dump(yaml_scenario,outfile, default_flow_style=False, explicit_start=True)