In [1]:
import xml.etree.ElementTree as ET 
import gzip
from xml.etree.cElementTree import ElementTree

import pandas as pd
import geopandas as gpd
import timeit
import geopy.distance
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as tck
from shapely.geometry import Point, MultiPoint, Polygon
import random
from pyproj import Proj, transform #Convertion EPSG2154 to WGS84

from datetime import datetime

import statistics
from collections import defaultdict 

import matsim

In [2]:
##Authors : Théo, Adel, Azise


##Author : Théo
def parse_attributes(node):
    values=[subnode.text for subnode in node.findall("attribute")]
    attributes=[subnode.attrib["name"] for subnode in node.findall("attribute")]
    person=dict()
    for ii in range(len(values)):
        person[attributes[ii]]=values[ii]
    if person!=[None]:
        return(person)
    
def parse_person(node):
    person = parse_attributes(node.find("attributes"))
    person["id"] = node.attrib["id"]
    return(person)

def parse_plan(node):
    children = list(node)
    plan={"selected":node.attrib["selected"]=="yes","activities":[{**{"activityType":child.tag}, **child.attrib} for child in children]}
    return plan

def parse_person_plan(node):
    # on récupère les plans sélectionnés
    plans = [parse_plan(plan) for plan in node.findall("plan") if plan.attrib["selected"]=="yes"]
    personId = node.attrib["id"]
    if len(plans)!=1:
        raise Exception("La personne d'id "+str(personId)+" a plusieurs plans sélectionnés !")
    else:
        plan={"personId":personId,"activities":plans[0]["activities"]}
        return(plan)

def parse_population(path):
    with gzip.open(initialPlanFile,'r') as fichier:
        tree = ET.ElementTree()
        tree.parse (fichier)
        population = tree.getroot()
        return [parse_person(node) for node in population.findall("person")]
    
def parse_plans_population(path):
    with gzip.open(initialPlanFile,'r') as fichier:
        tree = ET.ElementTree()
        tree.parse (fichier)
        population = tree.getroot()
        return [parse_person_plan(node) for node in population.findall("person")]

def select_first_coordinate_in_plan(person_plan):
    """A partir du plan d'une personne, remonte à ces premières coordonnées
    Renvoie un dict {personId,x,y}"""
    plan=person_plan["activities"]
    coordinates_found=False
    first_coordinates={"personId":person_plan["personId"]}
    for acti in plan:
        if "x" in acti:
            coordinates_found=True
            first_coordinates["x"]=acti["x"]
            first_coordinates["y"]=acti["y"]
            break
    if not coordinates_found:
        raise Exception("La personne "+str(person_plan["personId"])+" n'a pas de coordonnées définissables avec son plan d'activité.")
    else:
        return(first_coordinates)

def first_person_coordinates(path):
    with open(path, 'r') as fichier:
        tree = ET.ElementTree()
        tree.parse (fichier)
        population = tree.getroot()
        first_coordinates=[select_first_coordinate_in_plan(parse_person_plan(node)) for node in population.findall("person")]
        return(first_coordinates)

def population_stats(population):
    number=len(population)
    print("Taille population:   ",number)
    print("Titulaires du permis:",len([person for person in population if person["hasLicense"]=="yes"]))
    print("Femmes:              ",len([person for person in population if person["sex"]=="f"]))
    print("Hommes:              ",len([person for person in population if person["sex"]=="m"]))
    print("Revenu moyen:        ",statistics.mean([float(person["householdIncome"]) for person in population]))
    print("Abonnés aux pt:      ",len([person for person in population if person["hasPtSubscription"].lower()=="true"]))
    #return members
    #CSMembership(members,companies,CSMembership_file

    ##Author : Théo
def add_date(date1,date2):
    """date = string "hh:mm:ss" """
    hh1=int(date1[0:2])
    hh2=int(date2[0:2])
    mm1=int(date1[3:5])
    mm2=int(date2[3:5])
    ss1=int(date1[6:])
    ss2=int(date2[6:])
    rm=(ss1+ss2)//60
    ss3=str((ss1+ss2)%60)
    if len(ss3)==1:
        ss3="0"+ss3
    rh=(mm1+mm2+rm)//60
    mm3=str((mm1+mm2+rm)%60)
    if len(mm3)==1:
        mm3="0"+mm3
    hh3=str(rh+hh1+hh2)
    if len(hh3)==1:
        hh3="0"+hh3
    date3=hh3+":"+mm3+":"+ss3
    #print(date1,date2,date3)
    return(date3)

def dict_to_dataframe_2(list_dic):
    personId=[]
    precedingPurpose=[]
    followingPurpose=[]
    departureTime=[]
    arrivalTime=[]
    isFirst=[]
    isLast=[]
    mode=[]
    for personData in list_dic:
        for activity_index in range(len(personData['activities'])):
            activity=personData['activities'][activity_index]
            if activity["activityType"]=="leg":
                if personId and personId[-1]!=personData["personId"]:
                    isFirst.append(True)
                else:
                    isFirst.append(False)
                isLast.append(False)
                mode.append(activity["mode"])
                personId.append(personData["personId"])
                precedingPurpose.append(personData['activities'][activity_index-1]["type"])
                followingPurpose.append(personData['activities'][activity_index+1]["type"])
                departureTime.append(activity["dep_time"])
                if "start_time" in personData['activities'][activity_index+1]:
                    arrivalTime.append(personData['activities'][activity_index+1]["start_time"])
                else:
                    arrivalTime.append(add_date(activity["dep_time"],activity["trav_time"]))        
        isLast[-1]=True
    return(pd.DataFrame({"personId":personId,'departure_time':departureTime, 'arrival_time':arrivalTime, 'mode':mode, 'preceding_purpose':precedingPurpose,'following_purpose':followingPurpose,"is_first":isFirst,"is_last":isLast}))


##Author : Adel
def dict_to_dataframe_1(list_dic):
    lc=['personId', 'departure_time', 'arrival_time', 'mode', 'preceding_purpose', 'from_x', 'from_y', 
        'preceding_purpose_start_time', 'preceding_purpose_end_time', 'following_purpose', 'to_x', 'to_y', 
        'following_purpose_start_time', 'following_purpose_end_time', 'is_first', 'is_last']
    df = pd.DataFrame(columns=lc)
    
    for i in range(len(list_dic)):
        j=1#leg activity
        end=len(list_dic[i]['activities'])
        
        while(j<end-1):
            list_dic[i]['activities'][j]
            df = insert_into_dataframe(df,list_dic[i],j,0,0)#is_first=0 & is_last=0
            j+=2
        
    return df
        
               
def insert_into_dataframe(df,list_dic,j,is_first,is_last):
    end=len(list_dic['activities'])#Number of activities (act and leg)
    personId=list_dic['personId']##person id
    activities=list_dic['activities']##List of activities
    
    #departure_time
    #departure_time=datetime.strptime(activities[j]['dep_time'],'%H:%M:%S')
    departure_time=activities[j]['dep_time']
    
    #travel_time
    #travel_time=datetime.strptime(activities[j]['trav_time'],'%H:%M:%S')
    travel_time=activities[j]['trav_time']
    
    
    #arrival_time=departure_time+timedelta(hours=travel_time.time().hour,
     #                                            minutes=travel_time.time().minute,
     #                                            seconds=travel_time.time().second)
    arrival_time=departure_time+travel_time
    
    #O/D activities
    preceding_purpose=activities[j-1]['type']
    from_x=activities[j-1]['x']
    from_y=activities[j-1]['y']
    following_purpose=activities[j+1]['type']
    to_x=activities[j+1]['x']
    to_y=activities[j+1]['y']
    
    if ('end_time' in activities[j-1]) :
        preceding_purpose_end_time = activities[j-1]['end_time']
    else :
        preceding_purpose_end_time = np.nan
        
    if ('start_time' in activities[j-1]) :
        preceding_purpose_start_time = activities[j-1]['start_time']    
    else :
        preceding_purpose_start_time = np.nan
    
    if ('end_time' in activities[j+1]) :
        following_purpose_end_time = activities[j+1]['end_time']
    else :
        following_purpose_end_time = np.nan
        
    if ('start_time' in activities[j+1]) :
        following_purpose_start_time = activities[j+1]['start_time']
    else :
        following_purpose_start_time = np.nan
    
    #mode
    mode=activities[j]['mode']
    
    if j==1:
        is_first=1
    if j==end-2:
        is_last=1
    
    df = df.append({'personId': personId ,
                    'departure_time': departure_time,
                    'arrival_time' : arrival_time,
                    'mode': mode,
                    'preceding_purpose' : preceding_purpose,
                    'from_x' :  from_x, 
                    'from_y' : from_y,
                    'preceding_purpose_start_time' : preceding_purpose_start_time, 
                    'preceding_purpose_end_time' : preceding_purpose_end_time,
                    'following_purpose' : following_purpose,
                    'to_x' :  to_x, 
                    'to_y' : to_y,
                    'following_purpose_start_time' : following_purpose_start_time, 
                    'following_purpose_end_time' : following_purpose_end_time,
                    'is_first' : is_first,
                    'is_last' : is_last}, ignore_index=True)
    return df


def dict_to_dataframe_3(list_dic):
    personId=[]
    precedingPurpose=[]
    from_x = []
    from_y = []
    preceding_purpose_end_time = []
    preceding_purpose_start_time = []
    followingPurpose=[]
    to_x = []
    to_y = []
    following_purpose_start_time = []
    following_purpose_end_time = []
    departureTime=[]
    arrivalTime=[]
    isFirst=[]
    isLast=[]
    mode=[]
    for personData in list_dic:
        j=1
        for activity_index in range(len(personData['activities'])):
            end=len(personData['activities'])            
            
            activity=personData['activities'][activity_index]
            if activity["activityType"]=="leg":
                prec_act = personData['activities'][activity_index-1]
                foll_act = personData['activities'][activity_index+1]
                
                #if personId and personId[-1]!=personData["personId"]:
                #    isFirst.append(True)
                #else:
                 #   isFirst.append(False)
                #isLast.append(False)
                
                if j==1:
                    isFirst.append(True)
                    isLast.append(False)
                elif j==end-2:
                    isLast.append(True)
                    isFirst.append(False)
                else :
                    isFirst.append(False)  
                    isLast.append(False) 
                
                mode.append(activity["mode"])
                personId.append(personData["personId"])
                
                precedingPurpose.append(prec_act["type"])
                from_x.append(prec_act['x'])
                from_y.append(prec_act['y'])
                
                followingPurpose.append(foll_act["type"])
                to_x.append(foll_act['x'])
                to_y.append(foll_act['y'])
                
                if ('end_time' in prec_act) :
                    preceding_purpose_end_time.append(prec_act['end_time'])
                else :
                    preceding_purpose_end_time.append(np.nan)

                if ('start_time' in prec_act) :
                    preceding_purpose_start_time.append(prec_act['start_time'] )   
                else :
                    preceding_purpose_start_time.append(np.nan)

                if ('end_time' in foll_act) :
                    following_purpose_end_time.append(foll_act['end_time'])
                else :
                    following_purpose_end_time.append(np.nan)

                if ('start_time' in foll_act) :
                    following_purpose_start_time.append(foll_act['start_time'])
                else :
                    following_purpose_start_time.append(np.nan)
                
                departureTime.append(activity["dep_time"])
                if "start_time" in personData['activities'][activity_index+1]:
                    arrivalTime.append(personData['activities'][activity_index+1]["start_time"])
                else:
                    arrivalTime.append(add_date(activity["dep_time"],activity["trav_time"]))
                    
                j+=2
    """
    print(len(personId))
    print(len(departureTime))
    print(len(arrivalTime))
    print(len(mode))
    print(len(precedingPurpose))
    print(len(from_x))
    print(len(from_y))
    print(len(preceding_purpose_start_time))
    print(len(preceding_purpose_end_time))
    print(len(followingPurpose))
    print(len(to_x))
    print(len(to_y))
    print(len(following_purpose_start_time))
    print(len(following_purpose_end_time))
    print(len(isFirst))
    print(len(isLast))
    """
    return(pd.DataFrame({"person_id":personId,'departure_time':departureTime, 'arrival_time':arrivalTime, 
                         'mode':mode, 'preceding_purpose':precedingPurpose, "from_x":from_x, "from_y":from_y,
                         "preceding_purpose_start_time":preceding_purpose_start_time, 
                         "preceding_purpose_end_time":preceding_purpose_end_time, 'following_purpose':followingPurpose,
                         "to_x":to_x, "to_y":to_y, "following_purpose_start_time":following_purpose_start_time,
                         "following_purpose_end_time" : following_purpose_end_time, "is_first":isFirst,"is_last":isLast}))

## Loading data

In [3]:
output_path = "/media/aodiallo/Data/MATSIM/Simulation/robotaxi/output/scenario1_c3" 

### Input plan

In [7]:
initialPlanFile = '/media/aodiallo/Data/MATSIM/Simulation/robotaxi/input/scenario1/population_sce1.xml.gz'
#treePopulation = ET.parse(gzip.open(initialPlanFile,'r'))
#population  = treePopulation.getroot()

### Persons

In [8]:
#return a list of dict of person' attribuate
persons = parse_population(initialPlanFile)

In [9]:
##Example
persons[0]

{'age': '32',
 'bikeAvailability': 'none',
 'carAvailability': 'none',
 'censusHouseholdId': '183012',
 'censusPersonId': '377986',
 'employed': 'True',
 'hasLicense': 'yes',
 'hasPtSubscription': 'false',
 'householdId': '474430',
 'householdIncome': '4476.391583168445',
 'htsHouseholdId': '8484',
 'htsPersonId': '18514',
 'isPassenger': 'false',
 'sex': 'm',
 'subpopulation': 'private',
 'id': '1000008'}

In [10]:
df_persons_input = pd.concat([pd.DataFrame(dic, index = [0]) for dic in persons])

In [11]:
df_persons_input

Unnamed: 0,age,bikeAvailability,carAvailability,censusHouseholdId,censusPersonId,employed,hasLicense,hasPtSubscription,householdId,householdIncome,htsHouseholdId,htsPersonId,isPassenger,sex,subpopulation,id
0,32,none,none,183012,377986,True,yes,false,474430,4476.391583168445,8484,18514,false,m,private,1000008
0,32,all,none,183012,377986,True,yes,false,474430,4476.391583168445,1537,3356,false,m,private,1000009
0,32,some,none,183012,377986,True,yes,false,474430,4476.391583168445,4923,10846,true,m,private,1000010
0,33,none,none,18480,38075,False,yes,true,47644,2083.709401444271,8999,19603,false,m,private,100002
0,33,all,none,18480,38075,False,yes,false,47644,2083.709401444271,1830,3991,false,m,private,100003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,66,none,none,182997,377959,True,yes,false,474396,3022.192916676685,680,1540,false,f,private,999947
0,25,none,none,183008,377980,False,yes,false,474420,4924.560292323243,535,1230,false,m,private,999992
0,25,none,none,183008,377980,False,no,true,474420,4924.560292323243,7535,16468,false,m,private,999993
0,23,none,none,183008,377981,False,yes,false,474420,4924.560292323243,3375,7270,false,f,private,999994


In [12]:
df_persons_input = df_persons_input.reset_index()

In [13]:
df_persons_input = df_persons_input.drop(["index"], axis=1)
df_persons_input = df_persons_input.rename(columns={"id": "person_id"})

In [14]:
df_persons_input.to_csv("%s/df_persons_input_perons.csv" % output_path)

#### Number of persons per motorized households

In [15]:
df_private_persons_input = df_persons_input[df_persons_input["subpopulation"]=="private"]

In [17]:
df_count_persons = df_private_persons_input[["householdId"]].groupby("householdId").size().reset_index(name = "number_of_persons")

In [18]:
df_count_persons.describe()

Unnamed: 0,number_of_persons
count,20510.0
mean,2.353584
std,1.358536
min,1.0
25%,1.0
50%,2.0
75%,3.0
max,13.0


In [28]:
df_count_persons["number_of_persons"].value_counts(normalize=True) * 100

2     32.857143
1     32.096538
3     14.236958
4     12.657240
5      5.602145
6      1.911263
7      0.458313
8      0.131643
9      0.039005
11     0.004876
13     0.004876
Name: number_of_persons, dtype: float64

### Plans

In [19]:
## return a list of dict of person' plan
plans = parse_plans_population(initialPlanFile)

In [20]:
###Example
plans[0]

{'personId': '1000008',
 'activities': [{'activityType': 'activity',
   'type': 'home',
   'link': '8102',
   'facility': 'home_474430',
   'x': '844783.995555',
   'y': '6519931.594388',
   'end_time': '07:03:47'},
  {'activityType': 'leg',
   'mode': 'private_AV',
   'dep_time': '07:03:47',
   'trav_time': '00:03:59'},
  {'activityType': 'activity',
   'type': 'other',
   'link': '50655',
   'facility': 'sec_27677',
   'x': '842728.1',
   'y': '6519167.3',
   'start_time': '07:13:47',
   'end_time': '07:13:47'},
  {'activityType': 'leg',
   'mode': 'private_AV',
   'dep_time': '07:13:47',
   'trav_time': '00:09:34'},
  {'activityType': 'activity',
   'type': 'work',
   'link': '51370',
   'facility': 'work_263614',
   'x': '841768.16',
   'y': '6511898.9',
   'start_time': '07:58:47',
   'end_time': '12:03:47'},
  {'activityType': 'leg',
   'mode': 'walk',
   'dep_time': '12:03:47',
   'trav_time': '00:10:16'},
  {'activityType': 'activity',
   'type': 'leisure',
   'link': '32552',


In [21]:
## Start time
t1 = datetime.now()

In [22]:
df_trips_input = dict_to_dataframe_3(plans)

In [23]:
## End time
t2 = datetime.now()
print(str(t2-t1))

0:00:09.256703


In [24]:
df_trips_input

Unnamed: 0,person_id,departure_time,arrival_time,mode,preceding_purpose,from_x,from_y,preceding_purpose_start_time,preceding_purpose_end_time,following_purpose,to_x,to_y,following_purpose_start_time,following_purpose_end_time,is_first,is_last
0,1000008,07:03:47,07:13:47,private_AV,home,844783.995555,6519931.594388,,07:03:47,other,842728.1,6519167.3,07:13:47,07:13:47,True,False
1,1000008,07:13:47,07:58:47,private_AV,other,842728.1,6519167.3,07:13:47,07:13:47,work,841768.16,6511898.9,07:58:47,12:03:47,False,False
2,1000008,12:03:47,12:13:47,walk,work,841768.16,6511898.9,07:58:47,12:03:47,leisure,841455.79,6511422.98,12:13:47,12:33:47,False,False
3,1000008,12:33:47,12:43:47,walk,leisure,841455.79,6511422.98,12:13:47,12:33:47,work,841768.16,6511898.9,12:43:47,17:33:47,False,False
4,1000008,17:33:47,18:18:47,private_AV,work,841768.16,6511898.9,12:43:47,17:33:47,home,844783.995555,6519931.594388,18:18:47,19:28:47,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
354570,999994,07:05:31,07:15:31,private_AV,other,847968.6,6518541.3,07:05:31,07:05:31,education,847319.7,6519014.3,07:15:31,16:15:31,False,False
354571,999994,16:15:31,16:45:31,private_AV,education,847319.7,6519014.3,07:15:31,16:15:31,home,854492.55,6521135.91,16:45:31,,False,True
354572,999995,07:13:37,07:48:37,private_AV,home,854492.55,6521135.91,,07:13:37,education,857259.4,6521244.1,07:48:37,11:28:37,True,False
354573,999995,11:28:37,12:13:37,private_AV,education,857259.4,6521244.1,07:48:37,11:28:37,home,854492.55,6521135.91,12:13:37,18:28:37,False,False


In [25]:
df_trips_input.to_csv("%s/df_trips_input_plans.csv" % output_path)

## Simulation Output plan

In [19]:
outputPlanFile = '%s/sce1.tst1.output_plans.xml.gz' %output_path
#treePopulation = ET.parse(gzip.open(outputPlanFile,'r'))
#population  = treePopulation.getroot()

### Persons

In [20]:
#return a list of dict of person' attribuate
persons = parse_population(outputPlanFile)

In [21]:
df_persons_output = pd.concat([pd.DataFrame(dic, index = [0]) for dic in persons])

In [22]:
df_persons_output

Unnamed: 0,age,bikeAvailability,carAvailability,censusHouseholdId,censusPersonId,employed,hasLicense,hasPtSubscription,householdId,householdIncome,htsHouseholdId,htsPersonId,isPassenger,sex,subpopulation,id
0,32,none,none,183012,377986,True,yes,false,474430,4476.391583168445,8484,18514,false,m,private,1000008
0,32,all,none,183012,377986,True,yes,false,474430,4476.391583168445,1537,3356,false,m,private,1000009
0,32,some,none,183012,377986,True,yes,false,474430,4476.391583168445,4923,10846,true,m,private,1000010
0,33,none,none,18480,38075,False,yes,true,47644,2083.709401444271,8999,19603,false,m,private,100002
0,33,all,none,18480,38075,False,yes,false,47644,2083.709401444271,1830,3991,false,m,private,100003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,66,none,none,182997,377959,True,yes,false,474396,3022.192916676685,680,1540,false,f,private,999947
0,25,none,none,183008,377980,False,yes,false,474420,4924.560292323243,535,1230,false,m,private,999992
0,25,none,none,183008,377980,False,no,true,474420,4924.560292323243,7535,16468,false,m,private,999993
0,23,none,none,183008,377981,False,yes,false,474420,4924.560292323243,3375,7270,false,f,private,999994


In [23]:
df_persons_output = df_persons_output.reset_index()

In [24]:
df_persons_output = df_persons_output.drop(["index"], axis=1)
df_persons_output = df_persons_output.rename(columns={"id": "person_id"})

In [25]:
df_persons_output.to_csv("%s/df_persons_output.csv" % output_path)

### Plans

In [26]:
## return a list of dict of person' plan
plans = parse_plans_population(outputPlanFile)

In [27]:
## Start time
t1 = datetime.now()

In [28]:
df_trips_output = dict_to_dataframe_3(plans)

In [29]:
## End time
t2 = datetime.now()
print(str(t2-t1))

0:00:02.020414


In [30]:
df_trips_output

Unnamed: 0,person_id,departure_time,arrival_time,mode,preceding_purpose,from_x,from_y,preceding_purpose_start_time,preceding_purpose_end_time,following_purpose,to_x,to_y,following_purpose_start_time,following_purpose_end_time,is_first,is_last
0,1000008,07:03:47,07:13:47,private_AV,home,844783.995555,6519931.594388,,07:03:47,other,842728.1,6519167.3,07:13:47,07:13:47,True,False
1,1000008,07:13:47,07:58:47,private_AV,other,842728.1,6519167.3,07:13:47,07:13:47,work,841768.16,6511898.9,07:58:47,12:03:47,False,False
2,1000008,12:03:47,12:13:47,walk,work,841768.16,6511898.9,07:58:47,12:03:47,leisure,841455.79,6511422.98,12:13:47,12:33:47,False,False
3,1000008,12:33:47,12:43:47,walk,leisure,841455.79,6511422.98,12:13:47,12:33:47,work,841768.16,6511898.9,12:43:47,17:33:47,False,False
4,1000008,17:33:47,18:18:47,private_AV,work,841768.16,6511898.9,12:43:47,17:33:47,home,844783.995555,6519931.594388,18:18:47,19:28:47,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
354570,999994,07:05:31,07:15:31,private_AV,other,847968.6,6518541.3,07:05:31,07:05:31,education,847319.7,6519014.3,07:15:31,16:15:31,False,False
354571,999994,16:15:31,16:45:31,private_AV,education,847319.7,6519014.3,07:15:31,16:15:31,home,854492.55,6521135.91,16:45:31,,False,True
354572,999995,07:13:37,07:48:37,private_AV,home,854492.55,6521135.91,,07:13:37,education,857259.4,6521244.1,07:48:37,11:28:37,True,False
354573,999995,11:28:37,12:13:37,private_AV,education,857259.4,6521244.1,07:48:37,11:28:37,home,854492.55,6521135.91,12:13:37,18:28:37,False,False


In [31]:
df_trips_output.to_csv("%s/df_trips_simulation_output_plans.csv" % output_path)