In [79]:
import xml.etree.ElementTree as ET
import pandas as pd

# Load and parse the XML file
tree = ET.parse('Scenarios/cutoutWorlds/po-1_pn-1.0_sn-1/plans.xml')
root = tree.getroot()

# Prepare a list to collect the activity data
activities_data = []

# Iterate through each person in the XML
for person in root.findall('person'):
    person_id = person.get('id')
    
    # Collect each activity's data for the person
    for activity in person.findall('.//activity'):
        activity_type = activity.get('type')
        link = activity.get('link')
        # Split the activity type into two parts
        if '_' in activity_type:
            activity_type_main, activity_type_value = activity_type.split('_')
        else:
            activity_type_main, activity_type_value = activity_type, 0    
        # Create a dictionary for each activity of type "other"
        try:
            activity_data = {
                'person_id': person_id,
                'activity_type_main': activity_type_main,
                'activity_type_value': float(activity_type_value),  # Convert to float
                'link': link,
                'x': float(activity.get('x')),  # Convert to float
                'y': float(activity.get('y'))   # Convert to float
            }
        except:
            print("Error parsing")
            print(activity_type_value)
            activity_data ={}
        # Add the activity data to the list
        activities_data.append(activity_data)

# Convert the list of data into a pandas DataFrame
df_activities = pd.DataFrame(activities_data)
df_activities.to_pickle("./df_activities.pkl")

In [80]:
# Load and parse the XML file
tree = ET.parse('Scenarios/cutoutWorlds/po-1_pn-1.0_sn-1/network.xml')
root = tree.getroot()

# Prepare a list to collect the activity data
links_data = []
nodes_data = []
# Iterate through each person in the XML
for nodes in root.findall('nodes'): 
    for node in nodes.findall('.//node'):
        node_id = node.get('id')
        node_data = {
                'node_id': node_id,
                'x': float(node.get('x')),  # Convert to float
                'y': float(node.get('y'))   # Convert to float
        }
        nodes_data.append(node_data)
df_nodes = pd.DataFrame(nodes_data)

for links in root.findall('links'): 
    for link in links.findall('.//link'):
        link_id = link.get('id')
        link_data = {
                'link_id': link_id,
                'from': link.get('from'),  # Convert to float
                'to': link.get('to'),
                'length': float(link.get('length')),
                'freespeed': float(link.get('freespeed')),
                'capacity': float(link.get('capacity')),
                'permlanes': float(link.get('permlanes'))
        }
        links_data.append(link_data)
df_links = pd.DataFrame(links_data)
df_links = df_links.merge(df_nodes, how='left', left_on='from', right_on='node_id')
df_links = df_links.rename(columns={'x': 'start_node_x', 'y': 'start_node_y'})
df_links.drop('node_id', axis=1, inplace=True)
df_links = df_links.merge(df_nodes, how='left', left_on='to', right_on='node_id')
df_links = df_links.rename(columns={'x': 'end_node_x', 'y': 'end_node_y'})
df_links.drop('node_id', axis=1, inplace=True)
df_links.to_pickle("./df_links_network.pkl")

In [None]:
new = df_activities.merge(df_links, how='left', left_on='link', right_on='link_id')
new.drop(['length','freespeed','capacity','permlanes'], axis=1, inplace=True)
new

In [None]:
import json
file = 'Data/cutoutWorlds/Train/po-1_pn-1.0_sn-1/s-0.json'
with open(file, 'r') as f:
            data = json.load(f)
df_work = pd.DataFrame({
    'work_x': data['work_x'],
    'work_y': data['work_y'],
    'go_to_work': data['go_to_work']
})
df_home = pd.DataFrame({
    'home_x': data['home_x'],
    'home_y': data['home_y'],
    'go_to_home': data['go_to_home']
})

df_links_dataset = pd.DataFrame({
                'link_id': data['links_id'],
                'link_from': data['link_from'],
                'link_to': data['link_to'],
                'link_length': data['link_length']
            })
df_nodes_dataset = pd.DataFrame({
                'node_id': data['nodes_id'],
                'node_x': data['nodes_x'],
                'node_y': data['nodes_y']
            })
df_links_dataset = df_links_dataset.merge(df_nodes_dataset, how='left', left_on='link_from', right_on='node_id')
df_links_dataset = df_links_dataset.rename(columns={'node_x': 'start_node_x', 'node_y': 'start_node_y'})
df_links_dataset.drop('node_id', axis=1, inplace=True)
df_links_dataset = df_links_dataset.merge(df_nodes_dataset, how='left', left_on='link_to', right_on='node_id')
df_links_dataset = df_links_dataset.rename(columns={'node_x': 'end_node_x', 'node_y': 'end_node_y'})
df_links_dataset.drop('node_id', axis=1, inplace=True) 

In [None]:
df_act_work = df_activities[df_activities['activity_type_main']=='work']
df_act_work = df_act_work.merge(df_work, how='left', left_on=['x','y'], right_on=['work_x','work_y'])
df_act_work.drop(['x','y'], axis=1, inplace=True)
df_act_work_agg = df_act_work.groupby(by="link").sum()['go_to_work'].reset_index(drop=False)
df_act_home = df_activities[df_activities['activity_type_main']=='home']
df_act_home = df_act_home.merge(df_home, how='left', left_on=['x','y'], right_on=['home_x','home_y'])
df_act_home.drop(['x','y'], axis=1, inplace=True)
df_act_home_agg = df_act_home.groupby(by="link").sum()['go_to_home'].reset_index(drop=False)
df_act_agg = df_act_home_agg.merge(df_act_work_agg, how='outer', on='link')
df_act_agg.fillna(0, inplace=True)
df_act_agg['go_to_sum'] = df_act_agg['go_to_home'] + df_act_agg['go_to_work']

In [None]:
mg = df_links_dataset.merge(df_links, how='left', on=['start_node_x','start_node_y','end_node_x','end_node_y'])
mg = mg[['link_id_x','link_from','link_to','link_id_y','from', 'to']]
mg = mg.rename(columns={'link_id_x': 'link_id_dataset', 'link_id_y': 'link_id_network', 'link_from': 'node_from_dataset', 'from': 'node_from_network','link_to': 'node_to_dataset', 'to': 'node_to_network'})

In [None]:
link_home_work = mg.merge(df_act_agg, how='left', left_on='link_id_network', right_on='link')
link_home_work['go_to_sum'].fillna(0, inplace=True)
link_go_to = link_home_work[['link_id_dataset', 'go_to_sum']]