# Generates Mobility file for inference

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
if '..' not in sys.path:
    sys.path.append('..')
    
from matplotlib import pyplot as plt
%matplotlib inline

import pandas as pd
import numpy as np
import networkx as nx
import copy
import scipy as sp
import math
import seaborn
import pickle
import warnings
import os

from lib.mobilitysim import MobilitySimulator
from lib.town_data import generate_population, generate_sites, compute_distances
from lib.town_maps import MapIllustrator

### Settings for synthetic mobility data generation

In [3]:
downsample_population = 20 # Downsample population numbers by a factor of 20
downsample_sites = 10 # Downsample sites by a factor of 10

# Set the population generation mode.
# 3 options available: custom | random | heuristic
population_by = 'custom'

### Town details

In [4]:
population_path='lib/data/population/' # Directory containing FB population density files
sites_path='lib/data/queries/' # Directory containing OSM site files
bbox = (48.4900, 48.5485, 9.0224, 9.1061) # Coordinate bounding box

# Population per age group in Landkreis Tübingen
population_per_age_group = np.array([
    13416, # 0-4
    18324, # 5-14
    67389, # 15-34
    75011, # 35-59
    41441, # 60-79
    11750])# 80+

# Uncomment to include household distribution
household_info = {
    'size_dist' : [41.9, 33.8, 11.9, 9.1, 3.4], # distribution of household sizes (1-5 people)
    'soc_role' : {
        'children' : [1, 1, 3/20, 0, 0, 0], # age groups 0,1,2 can be children 
        'parents' : [0, 0, 17/20, 1, 0, 0], # age groups 2,3 can be parents
        'elderly' : [0, 0, 0, 0, 1, 1] # age groups 4,5 are elderly
    }
}

# Downsample population to Town of Tübingen (to be consistent with case data)
population_per_age_group = np.round(
    population_per_age_group * 90546 / (downsample_population * 227331)).astype('int').tolist()

print(f'Population per age group: {population_per_age_group}')

Population per age group: [267, 365, 1342, 1494, 825, 234]


### Extracted site data

* site_loc: list of site coordinates
* site_type: list of site category
* site_dict: helper dictionary with real name (string) of each site category (int)
* density_site_loc: list of site coordinates of specific type to be based on to generate population density

To generate sites of arbitrary sites for a given city, the following function sends queries to OpenStreetMap. In order to use it for additional types of sites, you need to specify queries in the Overpass API format. For more information, check the existing queries in **/lib/data/queries/**, https://wiki.openstreetmap.org/wiki/Overpass_API and http://overpass-turbo.eu/.

We separatelly use a query returning all buildings in a town to heuristically generate population density in the next steps if no real population density data is provided. An extra query is required for this purpose and it should be given as a **site_based_density_file** argument.

In [5]:
# This block sends queries to OpenStreetMap
# Make sure you have a working internet connection
# If an error occurs during execution, try executing again
site_files=[]
for root,dirs,files in os.walk(sites_path):
    for f in files:
        if f.endswith(".txt") and f != 'buildings.txt':
            site_files.append(sites_path+f)

site_loc, site_type, site_dict, density_site_loc = generate_sites(bbox=bbox, query_files=site_files,
                                site_based_density_file='lib/data/queries/buildings.txt')

Query 1 OK.
Query 2 OK.
Query 3 OK.
Query 4 OK.
Query 5 OK.
Query 6 OK.


### Site visualization

In [6]:
ill = MapIllustrator()
sitemap = ill.sites_map(bbox=bbox, site_loc=site_loc, site_type=site_type, site_dict = site_dict, map_name='site_distribution')
sitemap

### Generate home location based on various options

* home_loc: list of home coordinates
* people_age: list of age category 
* home_tile: list of map tile to which each home belongs
* tile_loc: list tile center coordinates

The following three options generate a population distribution across a geographical area consisting of tiles (square boxes) of specific resolution. More information about tile sizes can be found in https://wiki.openstreetmap.org/wiki/Zoom_levels. 

In [7]:
density_files=[]
for root,dirs,files in os.walk(population_path):
    for f in files:
        if f.endswith(".csv"):
            density_files.append(population_path+f)

if population_by == 'custom':
    # generate population across tiles based on density input
    home_loc, people_age, home_tile, tile_loc, people_household = generate_population(density_files=density_files, bbox=bbox,
        population_per_age_group=population_per_age_group, household_info=household_info, tile_level=16, seed=42)
    
elif population_by == 'random':
    # generate population across tiles uniformly at random
    home_loc, people_age, home_tile, tile_loc, people_household = generate_population(
        bbox=bbox, population_per_age_group=population_per_age_group,
        tile_level=16, seed=42)

elif population_by == 'heuristic':
    # generate population across tiles proportional to buildings per tile
    home_loc, people_age, home_tile, tile_loc, people_household = generate_population(bbox=bbox, density_site_loc=density_site_loc,
                            population_per_age_group=population_per_age_group, tile_level=16, seed=42)

### Home visualization

In [9]:
homemap = ill.population_map(bbox=bbox, home_loc=home_loc, map_name='population_distribution')
homemap

Downsample sites as given by settings

In [10]:
if downsample_sites > 1:
    np.random.seed(42)
    # downsample sites like populatoin
    idx = np.random.choice(len(site_loc), size=int(len(site_loc) / downsample_sites), 
                           replace=False, p=np.ones(len(site_loc)) / len(site_loc))

    new_site_loc, new_site_type = [], []
    site_loc, site_type = np.array(site_loc)[idx].tolist(), np.array(site_type)[idx].tolist()

In [11]:
print(f'Number of sites: ', len(site_loc))
print(f'Site types:      ', site_dict)

Number of sites:  94
Site types:       {0: 'bus_stop', 1: 'education', 2: 'office', 3: 'social', 4: 'supermarket'}


Compute pairwise distances between all tile centers and all sites

In [12]:
tile_site_dist = compute_distances(site_loc, tile_loc)

### Specify synthetic mobility patterns

Here we specify the patterns of mobility used for generating the synthetic traces based on the above home and site locations. Note that this is a general framework and can by arbitrarilty extended to any desired site numbers or types. See below for an example used in the first version of our paper.

Specify the mean duration of visit per type, or in reality, time spent in crowded places per type.

In [13]:
# 2h at office-education, 1.5h at restaurants/bars, 0.5 at supermarket, 0.2 at bus stop.
dur_mean_per_type = [2, 1.5, 0.2, 2, 0.5]

Determine the number of discrete sites a person visits per site type.

In [14]:
# 1 office, 1 school, 10 social, 2 supermarkets, 5 bus stops
variety_per_type = [1, 10, 5, 1, 2]

Set the number of visits per week that each group makes per type of site

In [15]:
# e.g. line 0 corresponds to age 0-4 : 
# no office, a lot of education (kindergarden), some social, no supermarket, no public transport 
mob_rate_per_age_per_type = [
    [5, 1, 0, 0, 0], # 0-4
    [5, 2, 3, 0, 0], # 5-14
    [2, 2, 3, 3, 1], # 15-34
    [0, 2, 1, 5, 1], # 35-59
    [0, 3, 2, 0, 1], # 60-79
    [0, 2, 1, 0, 1]]  # 80+
    
# convert to average visits per hour per week, to be compatible with simulator
mob_rate_per_age_per_type = np.divide(np.array(mob_rate_per_age_per_type), (24.0 * 7))

Set time horizon and delta. Due to the data horizon considered for inference, we use 17 days. The setting for delta is explained in the paper.

In [16]:
# time horizon
max_time = 17 * 24.0 # data availability
delta  = 4.6438 # as set by distributions

In [17]:
print('Population (by Age): ', population_per_age_group)
print('Sites (by type):     ',  [(np.array(site_type) == i).sum() for i in range(5)])

print('Total:', sum(population_per_age_group), len(site_type))

Population (by Age):  [267, 365, 1342, 1494, 825, 234]
Sites (by type):      [41, 14, 21, 15, 3]
Total: 4527 94


Save arguments for the class object instantiation to be able to initiate `MobilitySimulator` on the fly during inference. That is more efficient than pickling in some cases.

In [18]:
kwargs = dict(home_loc=home_loc, people_age=people_age, site_loc=site_loc,
    site_type=site_type, mob_rate_per_age_per_type=mob_rate_per_age_per_type,
    dur_mean_per_type=dur_mean_per_type, variety_per_type=variety_per_type, delta=delta,
    home_tile=home_tile, tile_site_dist=tile_site_dist, people_household=people_household)

with open(f'example_settings_{downsample_population}_{downsample_sites}.pk', 'wb') as fp:
    pickle.dump(kwargs, fp)

Create mobility traces as above, or comment in the last section bleow to specify fully artifial traces.

In [19]:
mob = MobilitySimulator(**kwargs)
mob.verbose = True

In [20]:
%time mob.simulate(max_time=max_time, seed=12345)
# %time mob.to_pickle(f'tu_mobility_{downsample_population}_{downsample_sites}.pk')

Simulate mobility for 408.00 time units... Simulated 71646 visits.
Find contacts... 
Checking site 1/94Checking site 2/94Checking site 3/94Checking site 4/94Checking site 5/94Checking site 6/94Checking site 7/94Checking site 8/94Checking site 9/94Checking site 10/94Checking site 11/94Checking site 12/94Checking site 13/94Checking site 14/94Checking site 15/94Checking site 16/94Checking site 17/94Checking site 18/94Checking site 19/94Checking site 20/94Checking site 21/94Checking site 22/94Checking site 23/94Checking site 24/94Checking site 25/94Checking site 26/94Checking site 27/94Checking site 28/94Checking site 29/94Checking site 30/94Checking site 31/94Checking site 32/94Checking site 33/94Checking site 34/94Checking site 35/94Checking site 36/94Checking site 37/94Checking site 38/94Checking site 39/94Checking site 40/94Checking site 41/94Checking site 42/94Checking site 43/94Checking site 44/94Checking site 45/94Checking site 46/94Chec