In [46]:
import xml.etree.ElementTree as ET
import pandas as pd

# Load and parse the XML file
tree = ET.parse('Scenarios/cutoutWorlds/po-1_pn-1.0_sn-1/plans.xml')
root = tree.getroot()

# Prepare a list to collect the activity data
activities_data = []

# Iterate through each person in the XML
for person in root.findall('person'):
    person_id = person.get('id')
    
    # Collect each activity's data for the person
    for activity in person.findall('.//activity'):
        activity_type = activity.get('type')
        link = activity.get('link')
        # Split the activity type into two parts
        if activity.get('end_time'):
            end_time = pd.to_timedelta(activity.get('end_time'))
        else:
            end_time = -1
        if activity.get('max_dur'):
            max_dur = pd.to_timedelta(activity.get('max_dur')).total_seconds()
        else:
            max_dur = -1
        if '_' in activity_type:
            activity_type_main, activity_type_value = activity_type.split('_')
        else:
            activity_type_main, activity_type_value = activity_type, 0    
        # Create a dictionary for each activity of type "other"
        activity_data = {
            'person_id': person_id,
            'activity_type_main': activity_type_main,
            'activity_type_value': float(activity_type_value),  # Convert to float
            'link': link,
            'x': float(activity.get('x')),  # Convert to float
            'y': float(activity.get('y')),   # Convert to float
            'max_dur': max_dur,
            'end_time': end_time
        }
        for attr in activity.findall('.//attribute'):
            name = attr.get('name')
            value = attr.text
            activity_data[name] = value
        # Add the activity data to the list
        activities_data.append(activity_data)

# Convert the list of data into a pandas DataFrame
df_activities = pd.DataFrame(activities_data)
df_activities.fillna(-1, inplace=True)
df_activities.to_pickle("./df_activities.pkl")

In [48]:

df_activities

Unnamed: 0,person_id,activity_type_main,activity_type_value,link,x,y,max_dur,end_time,zoneId,cemdapStopDuration_s
0,100012001,home,58800.0,12466,4.628514e+06,5.831327e+06,-1.0,0 days 07:55:00,12064472,-1.0
1,100012001,other,600.0,106959,4.629522e+06,5.830838e+06,540.0,-1,12064472,540.0
2,100012001,home,33000.0,12466,4.628514e+06,5.831327e+06,-1.0,0 days 17:24:00,12064472,33060.0
3,100012001,ride interaction,0.0,12466,4.628134e+06,5.831123e+06,0.0,-1,-1,-1.0
4,100012001,ride interaction,0.0,78465,4.625223e+06,5.837994e+06,0.0,-1,-1,-1.0
...,...,...,...,...,...,...,...,...,...,...
326809,freight_996_87464-berlin,freight,0.0,130117,4.591246e+06,5.832407e+06,-1.0,-1,-1,-1.0
326810,freight_9_144245-berlin,freight,0.0,12739,4.574147e+06,5.809461e+06,-1.0,0 days 08:44:10,-1,-1.0
326811,freight_9_144245-berlin,freight interaction,0.0,12739,4.574147e+06,5.809461e+06,0.0,-1,-1,-1.0
326812,freight_9_144245-berlin,freight interaction,0.0,144486,4.599983e+06,5.833178e+06,0.0,-1,-1,-1.0


In [12]:
 # Load and parse the XML file
tree = ET.parse('Scenarios/cutoutWorlds/po-1_pn-1.0_sn-1/network.xml')
root = tree.getroot()

# Prepare a list to collect the activity data
links_data = []
nodes_data = []
# Iterate through each person in the XML
for nodes in root.findall('nodes'): 
    for node in nodes.findall('.//node'):
        node_id = node.get('id')
        node_data = {
                'node_id': node_id,
                'x': float(node.get('x')),  # Convert to float
                'y': float(node.get('y'))   # Convert to float
        }
        nodes_data.append(node_data)
df_nodes = pd.DataFrame(nodes_data)

for links in root.findall('links'): 
    for link in links.findall('.//link'):
        link_id = link.get('id')
        link_data = {
                'link_id': link_id,
                'from': link.get('from'),  # Convert to float
                'to': link.get('to'),
                'length': float(link.get('length')),
                'freespeed': float(link.get('freespeed')),
                'capacity': float(link.get('capacity')),
                'permlanes': float(link.get('permlanes'))
        }
        for attr in link.findall('.//attribute'):
            name = attr.get('name')
            value = attr.text
            link_data[name] = value
        links_data.append(link_data)
df_links = pd.DataFrame(links_data)
df_links = df_links.merge(df_nodes, how='left', left_on='from', right_on='node_id')
df_links = df_links.rename(columns={'x': 'start_node_x', 'y': 'start_node_y'})
df_links.drop('node_id', axis=1, inplace=True)
df_links = df_links.merge(df_nodes, how='left', left_on='to', right_on='node_id')
df_links = df_links.rename(columns={'x': 'end_node_x', 'y': 'end_node_y'})
df_links.drop('node_id', axis=1, inplace=True)
df_links.to_pickle("./df_links_network.pkl")

In [13]:
df_links

Unnamed: 0,link_id,from,to,length,freespeed,capacity,permlanes,origid,type,start_node_x,start_node_y,end_node_x,end_node_y
0,1,4864982246,262215600,31.467991,4.166667,1200.0,1.0,34079256,residential,4.598660e+06,5.824485e+06,4.598687e+06,5.824468e+06
1,10,26703746,2951553394,97.996533,6.944444,3200.0,2.0,254807835,secondary,4.593483e+06,5.822597e+06,4.593571e+06,5.822640e+06
2,100,27318315,587724650,71.196902,2.777778,600.0,1.0,10616925,living_street,4.577283e+06,5.806557e+06,4.577298e+06,5.806627e+06
3,1000,29690627,29690626,17.031152,4.166667,1200.0,1.0,13763400,residential,4.590346e+06,5.826569e+06,4.590355e+06,5.826554e+06
4,10000,1106607970,36265803,27.555616,6.944444,1800.0,1.5,5382362,tertiary,4.642227e+06,5.803035e+06,4.642204e+06,5.803051e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...
202934,pt_9995,pt_500000065901,pt_500000087401,354.868966,12.236861,100000.0,1.0,,,4.625974e+06,5.909627e+06,4.626313e+06,5.909522e+06
202935,pt_9996,pt_500000087401,pt_500000086501,1353.018128,9.080659,100000.0,1.0,,,4.626313e+06,5.909522e+06,4.627578e+06,5.909042e+06
202936,pt_9997,pt_500000086501,pt_500000086601,1689.217448,8.082380,100000.0,1.0,,,4.627578e+06,5.909042e+06,4.628532e+06,5.907648e+06
202937,pt_9998,pt_500000086601,pt_500000083701,1600.265408,7.656772,100000.0,1.0,,,4.628532e+06,5.907648e+06,4.628974e+06,5.906110e+06


In [None]:
new = df_activities.merge(df_links, how='left', left_on='link', right_on='link_id')
new.drop(['length','freespeed','capacity','permlanes'], axis=1, inplace=True)
new

In [None]:
import json
file = 'Data/cutoutWorlds/Train/po-1_pn-1.0_sn-1/s-0.json'
with open(file, 'r') as f:
            data = json.load(f)
df_work = pd.DataFrame({
    'work_x': data['work_x'],
    'work_y': data['work_y'],
    'go_to_work': data['go_to_work']
})
df_home = pd.DataFrame({
    'home_x': data['home_x'],
    'home_y': data['home_y'],
    'go_to_home': data['go_to_home']
})

df_links_dataset = pd.DataFrame({
                'link_id': data['links_id'],
                'link_from': data['link_from'],
                'link_to': data['link_to'],
                'link_length': data['link_length']
            })
df_nodes_dataset = pd.DataFrame({
                'node_id': data['nodes_id'],
                'node_x': data['nodes_x'],
                'node_y': data['nodes_y']
            })
df_links_dataset = df_links_dataset.merge(df_nodes_dataset, how='left', left_on='link_from', right_on='node_id')
df_links_dataset = df_links_dataset.rename(columns={'node_x': 'start_node_x', 'node_y': 'start_node_y'})
df_links_dataset.drop('node_id', axis=1, inplace=True)
df_links_dataset = df_links_dataset.merge(df_nodes_dataset, how='left', left_on='link_to', right_on='node_id')
df_links_dataset = df_links_dataset.rename(columns={'node_x': 'end_node_x', 'node_y': 'end_node_y'})
df_links_dataset.drop('node_id', axis=1, inplace=True) 

In [None]:
df_act_work = df_activities[df_activities['activity_type_main']=='work']
df_act_work = df_act_work.merge(df_work, how='left', left_on=['x','y'], right_on=['work_x','work_y'])
df_act_work.drop(['x','y'], axis=1, inplace=True)
df_act_work_agg = df_act_work.groupby(by="link").sum()['go_to_work'].reset_index(drop=False)
df_act_home = df_activities[df_activities['activity_type_main']=='home']
df_act_home = df_act_home.merge(df_home, how='left', left_on=['x','y'], right_on=['home_x','home_y'])
df_act_home.drop(['x','y'], axis=1, inplace=True)
df_act_home_agg = df_act_home.groupby(by="link").sum()['go_to_home'].reset_index(drop=False)
df_act_agg = df_act_home_agg.merge(df_act_work_agg, how='outer', on='link')
df_act_agg.fillna(0, inplace=True)
df_act_agg['go_to_sum'] = df_act_agg['go_to_home'] + df_act_agg['go_to_work']

In [None]:
mg = df_links_dataset.merge(df_links, how='left', on=['start_node_x','start_node_y','end_node_x','end_node_y'])
mg = mg[['link_id_x','link_from','link_to','link_id_y','from', 'to']]
mg = mg.rename(columns={'link_id_x': 'link_id_dataset', 'link_id_y': 'link_id_network', 'link_from': 'node_from_dataset', 'from': 'node_from_network','link_to': 'node_to_dataset', 'to': 'node_to_network'})

In [None]:
link_home_work = mg.merge(df_act_agg, how='left', left_on='link_id_network', right_on='link')
link_home_work['go_to_sum'].fillna(0, inplace=True)
link_go_to = link_home_work[['link_id_dataset', 'go_to_sum']]