In [6]:
import xml.etree.ElementTree as ElementTree


class XmlListConfig(list):
    def __init__(self, aList):
        for element in aList:
            if element:
                # treat like dict
                if len(element) == 1 or element[0].tag != element[1].tag:
                    self.append(XmlDictConfig(element))
                # treat like list
                elif element[0].tag == element[1].tag:
                    self.append(XmlListConfig(element))
            elif element.text:
                text = element.text.strip()
                if text:
                    self.append(text)


class XmlDictConfig(dict):
    def __init__(self, parent_element):
        if parent_element.items():
            self.updateShim(dict(parent_element.items()))
        for element in parent_element:
            if len(element):
                aDict = XmlDictConfig(element)
                #   if element.items():
                #   aDict.updateShim(dict(element.items()))
                self.updateShim({element.tag: aDict})
            elif element.items():  # items() is specialy for attribtes
                elementattrib = element.items()
                #if element.text:
                    #elementattrib.append((element.tag, element.text))  # add tag:text if there exist
                self.updateShim({element.tag: dict(elementattrib)})
            else:
                self.updateShim({element.tag: element.text})

    def updateShim(self, aDict):
        for key in aDict.keys():  # keys() includes tag and attributes
            if key in self:
                value = self.pop(key)
                if type(value) is not list:
                    listOfDicts = []
                    listOfDicts.append(value)
                    listOfDicts.append(aDict[key])
                    self.update({key: listOfDicts})
                else:
                    value.append(aDict[key])
                    self.update({key: value})
            else:
                self.update({key: aDict[key]})

def strtotime(time):
    ftr = [3600,60,1]
    return sum([a*b for a,b in zip(ftr, map(int,time.split(':')))])

"""
/*
 * File              : congestion Pricing analysis_allinpython.py
 * Author            : Mina Lee(ml6543)
 * Date              : 08.08.2019
 */
"""



In [None]:
#Import file
import pandas as pd

tree = ElementTree.parse('./raw/BUILT.15.experienced_plans_area.xml')
root = tree.getroot()
xmldict = XmlDictConfig(root)



In [7]:

# Initiate activity related list object
id_data_act = []
activity_data = []
start_time = []
end_time = []
for type_tag in root.findall('person'):
    for act in type_tag.findall('plan/activity'):
        id_data_act.append(type_tag.get('id'))  # getting id
        activity_data.append(act.get('type'))  # getting work, home
        start_time.append(act.get('start_time'))
        end_time.append(act.get('end_time'))

# initiate mode related list object
id_data_mode = []
mode_data = []
route_data = []
trav_time=[]
dist_data = []
dep_list=[]

for type_tag in root.findall('person'):
    if len(type_tag.findall('plan/leg')) == 0:
        id_data_mode.append(type_tag.get('id'))
        mode_data.append('na')
        route_data.append('na')
        dep_list.append('na')
        trav_time.append('na')
        dist_data.append('na')
 
    for i, mode in enumerate(type_tag.findall('plan/leg')):
        id_data_mode.append(type_tag.get('id'))
        mode_data.append(mode.get('mode'))
        dep_hr=mode.get('dep_time')
        trav_time.append(mode.get('trav_time'))

        route = mode.find('route')
        dist_data.append(route.get('distance'))        
        
        dep_time=dep_hr.split(':')
        dep_time=int(dep_time[0])
        if dep_time>24:
            dep_time=dep_time-24
        dep_list.append(dep_time)
        
      
        if i == len(type_tag.findall('plan/leg')) - 1:
            id_data_mode.append(type_tag.get('id'))
            mode_data.append('N/A')
            route_data.append('N/A')
            dep_list.append('N/A')
            trav_time.append('N/A')
            dist_data.append('N/A')


actList = list(zip(id_data_act, activity_data, start_time, end_time))
modeList = list(zip(mode_data,dep_list,trav_time,dist_data))

df_act = pd.DataFrame(actList, columns=['id', 'activity', 'start_time', 'end_time'])
df_mode = pd.DataFrame(modeList, columns=['mode','dep_time','trav_time','distance'])
df_merged = pd.concat([df_act, df_mode], axis=1)
df_merged

Unnamed: 0,id,activity,start_time,end_time,mode,dep_time,trav_time,distance
0,0,Home,,08:35:00,taxi,8,00:14:36,6430.193693165087
1,0,Work,08:49:36,08:50:36,walk,8,00:12:33,2059.859322002017
2,0,Work,09:03:09,12:13:09,walk,12,00:18:24,3019.179402349809
3,0,Work,12:31:33,12:37:33,walk,12,00:20:58,3439.2718061384708
4,0,Work,12:58:31,17:58:31,bike,17,01:20:42,66182.72772909897
5,0,Home,19:19:13,,,,,
6,1,Home,,08:05:00,ride,8,01:15:29,61898.16857711576
7,1,Work,09:20:29,16:00:29,ride,16,01:15:29,61898.16857711576
8,1,Work,17:15:58,19:08:58,taxi,19,01:09:30,5245.680656643391
9,1,Work,20:18:28,21:13:28,walk,21,00:21:51,3583.912445756235


In [9]:
man_id = pd.read_csv('entire_man_nonman_subpopulation_id.csv')
man_id.head()

Unnamed: 0,id,man_nonman
0,0,1
1,1,1
2,2,1
3,3,1
4,4,1


In [10]:
df_merged['id']=df_merged['id'].astype(int)
df_merged2 = pd.merge(df_merged, man_id, left_on='id', right_on='id',how='left')
df_merged2['man_nonman']=pd.to_numeric(df_merged2['man_nonman'])
is_man =  df_merged2['man_nonman']==1
df_merged2_manOnly = df_merged2[is_man]
df_merged2_manOnly

Unnamed: 0,id,activity,start_time,end_time,mode,dep_time,trav_time,distance,man_nonman
0,0,Home,,08:35:00,taxi,8,00:14:36,6430.193693165087,1.0
1,0,Work,08:49:36,08:50:36,walk,8,00:12:33,2059.859322002017,1.0
2,0,Work,09:03:09,12:13:09,walk,12,00:18:24,3019.179402349809,1.0
3,0,Work,12:31:33,12:37:33,walk,12,00:20:58,3439.2718061384708,1.0
4,0,Work,12:58:31,17:58:31,bike,17,01:20:42,66182.72772909897,1.0
5,0,Home,19:19:13,,,,,,1.0
6,1,Home,,08:05:00,ride,8,01:15:29,61898.16857711576,1.0
7,1,Work,09:20:29,16:00:29,ride,16,01:15:29,61898.16857711576,1.0
8,1,Work,17:15:58,19:08:58,taxi,19,01:09:30,5245.680656643391,1.0
9,1,Work,20:18:28,21:13:28,walk,21,00:21:51,3583.912445756235,1.0


# mode_share



In [11]:
#ALL
df_agent_mode=df_merged2[['id']].groupby(df_merged2['mode']).count()
df_agent_mode

Unnamed: 0_level_0,id
mode,Unnamed: 1_level_1
FHV,58802
,368533
access_walk,194300
bike,47908
car,307070
cb,30417
egress_walk,146659
na,5820
pt,420043
ride,157060


In [16]:
#man only
df_manOnly_mode=df_merged2_manOnly[['id']].groupby(df_merged2_manOnly['mode']).count()
df_manOnly_mode

Unnamed: 0_level_0,id
mode,Unnamed: 1_level_1
FHV,11010
,41216
access_walk,23875
bike,7857
car,17732
cb,5240
egress_walk,18135
na,27
pt,46161
ride,10310


# departure_time

In [17]:
#All
df_agent_deptTime=df_merged2[['activity','id']].groupby(df_merged2['dep_time']).count()
df_agent_deptTime

Unnamed: 0_level_0,activity,id
dep_time,Unnamed: 1_level_1,Unnamed: 2_level_1
1,12711,12711
2,10547,10547
3,8512,8512
4,8289,8289
5,21729,21729
6,71163,71163
7,176656,176656
8,191701,191701
9,126919,126919
10,107588,107588


In [18]:
#man only
df_manOnly_deptTime=df_merged2_manOnly[['activity','id']].groupby(df_merged2_manOnly['dep_time']).count()
df_manOnly_deptTime




Unnamed: 0_level_0,activity,id
dep_time,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1399,1399
2,1101,1101
3,807,807
4,481,481
5,1311,1311
6,3808,3808
7,16040,16040
8,24685,24685
9,18231,18231
10,15333,15333
