In [5]:
import xml.etree.ElementTree as ET
import pandas as pd
for i in range(15, 20):
    # Load and parse the XML file
    tree = ET.parse(f'Scenarios/sparseWorlds/po-1_s-{i}/plans.xml')
    root = tree.getroot()

    # Prepare a list to collect the activity data
    activities_data = []

    # Iterate through each person in the XML
    for person in root.findall('person'):
        person_id = person.get('id')

        # Collect each activity's data for the person
        for activity in person.findall('.//activity'):
            activity_type = activity.get('type')
            link = activity.get('link')
            # Split the activity type into two parts
            if activity.get('end_time'):
                end_time = pd.to_timedelta(activity.get('end_time'))
            else:
                end_time = -1
            if activity.get('max_dur'):
                max_dur = pd.to_timedelta(activity.get('max_dur')).total_seconds()
            else:
                max_dur = -1
            if '_' in activity_type:
                activity_type_main, activity_type_value = activity_type.split('_')
            else:
                activity_type_main, activity_type_value = activity_type, 0    
            # Create a dictionary for each activity of type "other"
            activity_data = {
                'person_id': person_id,
                'activity_type_main': activity_type_main,
                'activity_type_value': float(activity_type_value),  # Convert to float
                'link': link,
                'x': float(activity.get('x')),  # Convert to float
                'y': float(activity.get('y')),   # Convert to float
                'max_dur': max_dur,
                'end_time': end_time
            }
            for attr in activity.findall('.//attribute'):
                name = attr.get('name')
                value = attr.text
                activity_data[name] = value
            # Add the activity data to the list
            activities_data.append(activity_data)

    # Convert the list of data into a pandas DataFrame
    df_activities = pd.DataFrame(activities_data)
    df_activities.fillna(-1, inplace=True)
    df_activities.to_pickle(f"Data/sparseWorlds/Test/po-1/df_activities_{i}.pkl")

In [6]:

df_activities

Unnamed: 0,person_id,activity_type_main,activity_type_value,link,x,y,max_dur,end_time,zoneId,cemdapStopDuration_s
0,178594401,home,66600.0,35879,4.598631e+06,5.816782e+06,-1.0,0 days 06:59:59,908010404,-1
1,178594401,pt interaction,0.0,pt_3508,4.598802e+06,5.816806e+06,0.0,-1,-1,-1
2,178594401,pt interaction,0.0,pt_39009,4.596292e+06,5.818477e+06,0.0,-1,-1,-1
3,178594401,other,1800.0,130510,4.596137e+06,5.818365e+06,2650.0,-1,902020206,1560
4,178594401,shopping,7800.0,130510,4.596353e+06,5.819063e+06,-1.0,0 days 10:10:59,902010104,7980
...,...,...,...,...,...,...,...,...,...,...
799,449413501,home,12000.0,116208,4.590953e+06,5.826040e+06,-1.0,0 days 09:58:00,901044102,11760
800,449413501,other,1200.0,46801,4.592102e+06,5.825805e+06,900.0,-1,901044102,900
801,449413501,home,4800.0,116208,4.590953e+06,5.826040e+06,4800.0,-1,901044102,4800
802,449413501,shopping,600.0,46801,4.592212e+06,5.825586e+06,60.0,-1,901044102,60


In [11]:
for i in range(0, 10):
    # Load and parse the XML file
    tree = ET.parse(f'Scenarios/sparseWorlds/po-1_s-{i}/network.xml')
    root = tree.getroot()

    # Prepare a list to collect the activity data
    links_data = []
    nodes_data = []
    # Iterate through each person in the XML
    for nodes in root.findall('nodes'): 
        for node in nodes.findall('.//node'):
            node_id = node.get('id')
            node_data = {
                    'node_id': node_id,
                    'x': float(node.get('x')),  # Convert to float
                    'y': float(node.get('y'))   # Convert to float
            }
            nodes_data.append(node_data)
    df_nodes = pd.DataFrame(nodes_data)

    for links in root.findall('links'): 
        for link in links.findall('.//link'):
            link_id = link.get('id')
            link_data = {
                    'link_id': link_id,
                    'from': link.get('from'),  # Convert to float
                    'to': link.get('to'),
                    'length': float(link.get('length')),
                    'freespeed': float(link.get('freespeed')),
                    'capacity': float(link.get('capacity')),
                    'permlanes': float(link.get('permlanes'))
            }
            for attr in link.findall('.//attribute'):
                name = attr.get('name')
                value = attr.text
                link_data[name] = value
            links_data.append(link_data)
    df_links = pd.DataFrame(links_data)
    df_links = df_links.merge(df_nodes, how='left', left_on='from', right_on='node_id')
    df_links = df_links.rename(columns={'x': 'start_node_x', 'y': 'start_node_y'})
    df_links.drop('node_id', axis=1, inplace=True)
    df_links = df_links.merge(df_nodes, how='left', left_on='to', right_on='node_id')
    df_links = df_links.rename(columns={'x': 'end_node_x', 'y': 'end_node_y'})
    df_links.drop('node_id', axis=1, inplace=True)
    df_links.to_pickle(f"Data/sparseWorlds/Train/po-1/df_links_network_{i}.pkl")

In [12]:
df_links

Unnamed: 0,link_id,from,to,length,freespeed,capacity,permlanes,origid,type,start_node_x,start_node_y,end_node_x,end_node_y
0,100023,29270483,2164561176,155.658594,6.944444,1600.0,2.0,281095764,secondary,4.597143e+06,5.821851e+06,4.597153e+06,5.822006e+06
1,10006,26762962,26704067,18.353501,6.944444,4800.0,3.0,24912365,secondary,4.590583e+06,5.820063e+06,4.590568e+06,5.820073e+06
2,100123,2851028086,2851028085,14.320611,6.944444,3200.0,2.0,281095721,secondary_link,4.589787e+06,5.814297e+06,4.589774e+06,5.814290e+06
3,100135,29270482,29270522,198.247968,6.944444,1600.0,2.0,281095736,secondary,4.597129e+06,5.821861e+06,4.597078e+06,5.821669e+06
4,100136,29270522,2884462964,9.702576,6.944444,3200.0,2.0,281095736,secondary,4.597078e+06,5.821669e+06,4.597074e+06,5.821660e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...
45839,pt_9995,pt_500000065901,pt_500000087401,354.868966,12.236861,100000.0,1.0,,,4.625974e+06,5.909627e+06,4.626313e+06,5.909522e+06
45840,pt_9996,pt_500000087401,pt_500000086501,1353.018128,9.080659,100000.0,1.0,,,4.626313e+06,5.909522e+06,4.627578e+06,5.909042e+06
45841,pt_9997,pt_500000086501,pt_500000086601,1689.217448,8.082380,100000.0,1.0,,,4.627578e+06,5.909042e+06,4.628532e+06,5.907648e+06
45842,pt_9998,pt_500000086601,pt_500000083701,1600.265408,7.656772,100000.0,1.0,,,4.628532e+06,5.907648e+06,4.628974e+06,5.906110e+06


In [None]:
new = df_activities.merge(df_links, how='left', left_on='link', right_on='link_id')
new.drop(['length','freespeed','capacity','permlanes'], axis=1, inplace=True)
new

In [None]:
import json
file = 'Data/cutoutWorlds/Train/po-1_pn-1.0_sn-1/s-0.json'
with open(file, 'r') as f:
            data = json.load(f)
df_work = pd.DataFrame({
    'work_x': data['work_x'],
    'work_y': data['work_y'],
    'go_to_work': data['go_to_work']
})
df_home = pd.DataFrame({
    'home_x': data['home_x'],
    'home_y': data['home_y'],
    'go_to_home': data['go_to_home']
})

df_links_dataset = pd.DataFrame({
                'link_id': data['links_id'],
                'link_from': data['link_from'],
                'link_to': data['link_to'],
                'link_length': data['link_length']
            })
df_nodes_dataset = pd.DataFrame({
                'node_id': data['nodes_id'],
                'node_x': data['nodes_x'],
                'node_y': data['nodes_y']
            })
df_links_dataset = df_links_dataset.merge(df_nodes_dataset, how='left', left_on='link_from', right_on='node_id')
df_links_dataset = df_links_dataset.rename(columns={'node_x': 'start_node_x', 'node_y': 'start_node_y'})
df_links_dataset.drop('node_id', axis=1, inplace=True)
df_links_dataset = df_links_dataset.merge(df_nodes_dataset, how='left', left_on='link_to', right_on='node_id')
df_links_dataset = df_links_dataset.rename(columns={'node_x': 'end_node_x', 'node_y': 'end_node_y'})
df_links_dataset.drop('node_id', axis=1, inplace=True) 

In [None]:
df_act_work = df_activities[df_activities['activity_type_main']=='work']
df_act_work = df_act_work.merge(df_work, how='left', left_on=['x','y'], right_on=['work_x','work_y'])
df_act_work.drop(['x','y'], axis=1, inplace=True)
df_act_work_agg = df_act_work.groupby(by="link").sum()['go_to_work'].reset_index(drop=False)
df_act_home = df_activities[df_activities['activity_type_main']=='home']
df_act_home = df_act_home.merge(df_home, how='left', left_on=['x','y'], right_on=['home_x','home_y'])
df_act_home.drop(['x','y'], axis=1, inplace=True)
df_act_home_agg = df_act_home.groupby(by="link").sum()['go_to_home'].reset_index(drop=False)
df_act_agg = df_act_home_agg.merge(df_act_work_agg, how='outer', on='link')
df_act_agg.fillna(0, inplace=True)
df_act_agg['go_to_sum'] = df_act_agg['go_to_home'] + df_act_agg['go_to_work']

In [None]:
mg = df_links_dataset.merge(df_links, how='left', on=['start_node_x','start_node_y','end_node_x','end_node_y'])
mg = mg[['link_id_x','link_from','link_to','link_id_y','from', 'to']]
mg = mg.rename(columns={'link_id_x': 'link_id_dataset', 'link_id_y': 'link_id_network', 'link_from': 'node_from_dataset', 'from': 'node_from_network','link_to': 'node_to_dataset', 'to': 'node_to_network'})

In [None]:
link_home_work = mg.merge(df_act_agg, how='left', left_on='link_id_network', right_on='link')
link_home_work['go_to_sum'].fillna(0, inplace=True)
link_go_to = link_home_work[['link_id_dataset', 'go_to_sum']]