# Installation :


In [0]:
!git clone https://github.com/scikit-mobility/scikit-mobility.git
!cd scikit-mobility && python3 setup.py install
!pip3 install scikit-mobility
!git clone https://github.com/IhabBendidi/morocco.geojson.git

Be sure to restart the runtime afterward (on google colab) so that the installation really take place.

In [0]:
import geopandas as gpd  
import pandas as pd
import skmob
from skmob.models.epr import DensityEPR
from skmob.models.epr import Ditras
from skmob.models.markov_diary_generator import MarkovDiaryGenerator
from skmob.preprocessing import filtering, compression, detection, clustering

Setting up the start time of the simulation, between 1/1/2019 and 15/1/2019

In [0]:
# Helper function for protecting against duplicates ids
def change_uid(x,l):
  x = str(x) + l
  return x

In [0]:
# starting and end times of the simulation
start_time = pd.to_datetime('2019/01/01 08:00:00')
end_time = pd.to_datetime('2019/01/15 08:00:00')

### Irfane simulation

In [0]:
# Getting irfane data (rabat)
irfane_tessellation = gpd.read_file('morocco.geojson/irfane.geojson')
depr = DensityEPR()
#generating and filtering data to make it as close to real as possible
density_tdf = depr.generate(start_time, end_time, irfane_tessellation, relevance_column='population', n_agents=120)
density_tdf = filtering.filter(density_tdf, max_speed_kmh=5.)
density_tdf = detection.stops(density_tdf, stop_radius_factor=0.5, minutes_for_a_stop=50.0, spatial_radius_km=0.3, leaving_time=False)

In [0]:
# changing ids of citizens to not have duplicated later on
density_tdf["uid"] = density_tdf["uid"].apply(change_uid,args=["a"])

We will now generate habits of agents, through the markov diary generator, for Al Irfane

In [0]:
# load and preprocess data to train the MarkovDiaryGenerator
url = 'https://raw.githubusercontent.com/scikit-mobility/scikit-mobility/master/tutorial/data/geolife_sample.txt.gz'
df = pd.read_csv(url, sep=',', compression='gzip')
tdf = skmob.TrajDataFrame(df, latitude='lat', longitude='lon', user_id='user', datetime='datetime')
#ctdf = compression.compress(tdf)
stdf = detection.stops(tdf,stop_radius_factor=0.5, minutes_for_a_stop=50.0, spatial_radius_km=0.3, leaving_time=False)
markov_tdf = clustering.cluster(stdf)

In [0]:
# instantiate and train the MarkovDiaryGenerator
mdg = MarkovDiaryGenerator()
mdg.fit(markov_tdf, 2, lid='cluster')

In [0]:
# instantiate the model
ditras = Ditras(mdg)
# run the model
ditras_tdf = ditras.generate(start_time, end_time, irfane_tessellation, relevance_column='population',
                    n_agents=150, od_matrix=None, show_progress=True)

In [0]:
# changing ids of citizens to not have duplicated later on
ditras_tdf["uid"] = ditras_tdf["uid"].apply(change_uid,args=["b"])

In [0]:
irfan_tdf = pd.concat([ditras_tdf,density_tdf],ignore_index=True)

In [0]:
irfan_tdf.to_csv("irfan_mobility.csv")

### Agdal Simulation

In [0]:
# Getting Agdal data (rabat)
agdal_tessellation = gpd.read_file('morocco.geojson/agdal.geojson')
depr = DensityEPR()
#generating and filtering data to make it as close to real as possible
density_tdf = depr.generate(start_time, end_time, agdal_tessellation, relevance_column='population', n_agents=120)
density_tdf = filtering.filter(density_tdf, max_speed_kmh=5.)
density_tdf = detection.stops(density_tdf, stop_radius_factor=0.5, minutes_for_a_stop=50.0, spatial_radius_km=0.3, leaving_time=False)

In [0]:
# changing ids of citizens to not have duplicated later on
density_tdf["uid"] = density_tdf["uid"].apply(change_uid,args=["c"])

In [0]:
# load and preprocess data to train the MarkovDiaryGenerator
url = 'https://raw.githubusercontent.com/scikit-mobility/scikit-mobility/master/tutorial/data/geolife_sample.txt.gz'
df = pd.read_csv(url, sep=',', compression='gzip')
tdf = skmob.TrajDataFrame(df, latitude='lat', longitude='lon', user_id='user', datetime='datetime')
#ctdf = compression.compress(tdf)
stdf = detection.stops(tdf,stop_radius_factor=0.5, minutes_for_a_stop=50.0, spatial_radius_km=0.3, leaving_time=False)
markov_tdf = clustering.cluster(stdf)
# instantiate and train the MarkovDiaryGenerator
mdg = MarkovDiaryGenerator()
mdg.fit(markov_tdf, 2, lid='cluster')

In [0]:
# instantiate the model
ditras = Ditras(mdg)
# run the model
ditras_tdf = ditras.generate(start_time, end_time, agdal_tessellation, relevance_column='population',
                    n_agents=150, od_matrix=None, show_progress=True)

In [0]:
# changing ids of citizens to not have duplicated later on
ditras_tdf["uid"] = ditras_tdf["uid"].apply(change_uid,args=["d"])

In [0]:
agdal_tdf = pd.concat([ditras_tdf,density_tdf],ignore_index=True)

In [0]:
agdal_tdf.to_csv("agdal_mobility.csv")

### Aggregate results

In [0]:
# Getting Agdal data (rabat)
dual_tessellation = gpd.read_file('morocco.geojson/agdal_irfane.geojson')
depr = DensityEPR()
#generating and filtering data to make it as close to real as possible
density_tdf = depr.generate(start_time, end_time, dual_tessellation, relevance_column='population', n_agents=120)
density_tdf = filtering.filter(density_tdf, max_speed_kmh=5.)
density_tdf = detection.stops(density_tdf, stop_radius_factor=0.5, minutes_for_a_stop=50.0, spatial_radius_km=0.3, leaving_time=False)

In [0]:
# changing ids of citizens to not have duplicated later on
density_tdf["uid"] = density_tdf["uid"].apply(change_uid,args=["e"])

In [0]:
# load and preprocess data to train the MarkovDiaryGenerator
url = 'https://raw.githubusercontent.com/scikit-mobility/scikit-mobility/master/tutorial/data/geolife_sample.txt.gz'
df = pd.read_csv(url, sep=',', compression='gzip')
tdf = skmob.TrajDataFrame(df, latitude='lat', longitude='lon', user_id='user', datetime='datetime')
#ctdf = compression.compress(tdf)
stdf = detection.stops(tdf,stop_radius_factor=0.5, minutes_for_a_stop=50.0, spatial_radius_km=0.3, leaving_time=False)
markov_tdf = clustering.cluster(stdf)
# instantiate and train the MarkovDiaryGenerator
mdg = MarkovDiaryGenerator()
mdg.fit(markov_tdf, 2, lid='cluster')

In [0]:
# instantiate the model
ditras = Ditras(mdg)
# run the model
ditras_tdf = ditras.generate(start_time, end_time, dual_tessellation, relevance_column='population',
                    n_agents=150, od_matrix=None, show_progress=True)

In [0]:
# changing ids of citizens to not have duplicated later on
ditras_tdf["uid"] = ditras_tdf["uid"].apply(change_uid,args=["f"])

In [0]:
dual_tdf = pd.concat([ditras_tdf,density_tdf],ignore_index=True)

In [0]:
simulation_tdf = pd.concat([agdal_tdf,irfan_tdf,dual_tdf],ignore_index=True)

In [0]:
simulation_tdf.to_csv('simulation_data.csv')

# Data cleaning and preparation

In [0]:
import pandas as pd
simulation_tdf = pd.read_csv('simulation_data.csv')

In [0]:
import json
from shapely.geometry import shape, Point

In [0]:
# load GeoJSON file containing sectors
with open('morocco.geojson/agdal_irfane.geojson', 'r') as f:
  js = json.load(f)

In [0]:
def get_zone_id(x):
  point = Point( x["lng"],x["lat"])
  zone_id = ""
  for feature in js['features']:
    polygon = shape(feature['geometry'])
    if polygon.contains(point):
      zone_id = feature['properties']['id']
  x['zone_id'] = zone_id
  return x

In [0]:
# Adding the ids of the zones 
simulation_tdf = simulation_tdf.apply(get_zone_id,axis=1)

# Filtering zones without a registered zone id
simulation_tdf = simulation_tdf[simulation_tdf['zone_id'] != ""][["uid","datetime","lat","lng","zone_id"]]

In [76]:
simulation_tdf

Unnamed: 0,uid,datetime,lat,lng,zone_id
0,1d,2019-01-01 08:00:00.000000,33.992823,-6.857832,lot 3
1,1d,2019-01-01 11:00:00.000000,34.000416,-6.857033,Gare
2,1d,2019-01-01 17:00:00.000000,34.000032,-6.854877,ENIM
3,1d,2019-01-01 18:00:00.000000,33.992823,-6.857832,lot 3
4,1d,2019-01-02 12:00:00.000000,34.000416,-6.857033,Gare
5,1d,2019-01-03 09:00:00.000000,33.992823,-6.857832,lot 3
6,1d,2019-01-03 18:00:00.000000,34.000032,-6.854877,ENIM
7,1d,2019-01-03 23:00:00.000000,33.992823,-6.857832,lot 3
8,1d,2019-01-04 15:00:00.000000,34.000032,-6.854877,ENIM
9,1d,2019-01-04 18:00:00.000000,33.992823,-6.857832,lot 3
