In [1]:
# Force PyTorch to use CPU mode
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# Retry importing PyTorch Geometric with CPU-only mode
import torch
from torch_geometric.data import HeteroData
import pandas as pd
from pyvis.network import Network

In [2]:
# Reload the datasets
zones_df = pd.read_csv("data/zones.csv")
purposes_df = pd.read_csv("data/purposes.csv")
sample_households_df = pd.read_csv("data/sample_households.csv")
sample_people_df = pd.read_csv("data/sample_people.csv")
od_matrix_df = pd.read_csv("data/od_matrix.csv")
sample_travel_diaries_df = pd.read_csv("data/sample_travel_diaries.csv")

In [3]:

# Function to construct the PyG-compatible heterogeneous graph (CPU mode only)
def construct_starting_graph_pyg(zones_df, purposes_df, sample_households_df, sample_people_df, od_matrix_df):
    """
    Constructs the initial graph using PyTorch Geometric (PyG) with heterogeneous nodes and edges.
    All persons are initially connected to their home (Residential) purpose node with duration=None.
    """

    data = HeteroData()  # Heterogeneous graph for different node types

    # Add Zone Nodes
    data["zone"].x = torch.arange(len(zones_df), dtype=torch.float).view(-1, 1)  # Dummy feature
    zone_id_map = {zone_id: i for i, zone_id in enumerate(zones_df["zone_id"])}

    # Add Purpose Nodes
    data["purpose"].x = torch.tensor(purposes_df["attractiveness_score"].values, dtype=torch.float).view(-1, 1)
    purpose_id_map = {purpose_id: i for i, purpose_id in enumerate(purposes_df["purpose_id"])}

    # Add Household Nodes
    data["household"].x = torch.tensor(sample_households_df[["household_income", "household_size"]].values, dtype=torch.float)
    household_id_map = {hh_id: i for i, hh_id in enumerate(sample_households_df["household_id"])}

    # Add Person Nodes
    data["person"].x = torch.tensor(sample_people_df[["age"]].values, dtype=torch.float).view(-1, 1)  # Single feature (age)
    person_id_map = {p_id: i for i, p_id in enumerate(sample_people_df["person_id"])}

    # Define Zone-Zone Edges (OD Matrix travel times)
    src, dst = [], []
    for _, row in od_matrix_df.iterrows():
        src.append(zone_id_map[row["origin"]])
        dst.append(zone_id_map[row["destination"]])
    data["zone", "travel", "zone"].edge_index = torch.tensor([src, dst], dtype=torch.long)

    # Define Zone-Purpose Edges
    src, dst = [], []
    for _, row in purposes_df.iterrows():
        src.append(zone_id_map[row["zone_id"]])
        dst.append(purpose_id_map[row["purpose_id"]])
    data["zone", "has_purpose", "purpose"].edge_index = torch.tensor([src, dst], dtype=torch.long)

    # Define Household-Zone Edges
    src, dst = [], []
    for _, row in sample_households_df.iterrows():
        src.append(household_id_map[row["household_id"]])
        dst.append(zone_id_map[row["zone_id"]])
    data["household", "located_in", "zone"].edge_index = torch.tensor([src, dst], dtype=torch.long)

    # Define Person-Household Edges
    src, dst = [], []
    for _, row in sample_people_df.iterrows():
        src.append(person_id_map[row["person_id"]])
        dst.append(household_id_map[row["household_id"]])
    data["person", "belongs_to", "household"].edge_index = torch.tensor([src, dst], dtype=torch.long)

    # Define Person-Person Edges (Household relationships)
    src, dst = [], []
    household_members = sample_people_df.groupby("household_id")["person_id"].apply(list).to_dict()
    for household, members in household_members.items():
        for i in range(len(members)):
            for j in range(i + 1, len(members)):
                src.append(person_id_map[members[i]])
                dst.append(person_id_map[members[j]])
                src.append(person_id_map[members[j]])
                dst.append(person_id_map[members[i]])  # Bidirectional
    data["person", "related_to", "person"].edge_index = torch.tensor([src, dst], dtype=torch.long)

    # Define Person-Purpose Initial Edges (All Persons Start at Home)
    src, dst = [], []
    for _, row in sample_people_df.iterrows():
        household_zone = sample_households_df[sample_households_df["household_id"] == row["household_id"]]["zone_id"].values[0]
        home_purpose = f"P_{household_zone}_Residential"
        src.append(person_id_map[row["person_id"]])
        dst.append(purpose_id_map[home_purpose])
    data["person", "performs", "purpose"].edge_index = torch.tensor([src, dst], dtype=torch.long)

    return data


In [4]:
# Construct the PyG-compatible heterogeneous graph (CPU mode only)
starting_graph_pyg = construct_starting_graph_pyg(zones_df, purposes_df, sample_households_df, sample_people_df, od_matrix_df)

# Display the graph structure
starting_graph_pyg

HeteroData(
  zone={ x=[5, 1] },
  purpose={ x=[30, 1] },
  household={ x=[7, 2] },
  person={ x=[21, 1] },
  (zone, travel, zone)={ edge_index=[2, 10] },
  (zone, has_purpose, purpose)={ edge_index=[2, 30] },
  (household, located_in, zone)={ edge_index=[2, 7] },
  (person, belongs_to, household)={ edge_index=[2, 21] },
  (person, related_to, person)={ edge_index=[2, 46] },
  (person, performs, purpose)={ edge_index=[2, 21] }
)

In [5]:
# Map indices to real-world names
person_id_map = {i: p_id for i, p_id in enumerate(sample_people_df["person_id"])}
household_id_map = {i: h_id for i, h_id in enumerate(sample_households_df["household_id"])}
purpose_id_map = {i: f"{row['purpose_type']}_Z{row['zone_id']}" for i, row in purposes_df.iterrows()}

def get_node_name(node_type, index):
    if node_type == "person":
        return person_id_map.get(index, f"Person_{index}")
    elif node_type == "household":
        return household_id_map.get(index, f"Household_{index}")
    elif node_type == "purpose":
        return purpose_id_map.get(index, f"Purpose_{index}")
    return f"{node_type}_{index}"

# Print edges with actual names
for edge_type in starting_graph_pyg.edge_types:
    src_type, _, dst_type = edge_type
    edge_index = starting_graph_pyg[edge_type].edge_index.numpy()

    print(f"ðŸ”¹ {edge_type} Edges:")
    for src, dst in zip(edge_index[0], edge_index[1]):
        print(f"  {get_node_name(src_type, src)} â†’ {get_node_name(dst_type, dst)}")


ðŸ”¹ ('zone', 'travel', 'zone') Edges:
  zone_0 â†’ zone_1
  zone_0 â†’ zone_2
  zone_1 â†’ zone_0
  zone_1 â†’ zone_4
  zone_1 â†’ zone_2
  zone_2 â†’ zone_0
  zone_2 â†’ zone_1
  zone_2 â†’ zone_3
  zone_3 â†’ zone_2
  zone_4 â†’ zone_1
ðŸ”¹ ('zone', 'has_purpose', 'purpose') Edges:
  zone_0 â†’ Residential_ZZ1
  zone_0 â†’ Study_ZZ1
  zone_0 â†’ Work_ZZ1
  zone_0 â†’ Shopping_ZZ1
  zone_0 â†’ Leisure_ZZ1
  zone_0 â†’ Other_ZZ1
  zone_1 â†’ Residential_ZZ2
  zone_1 â†’ Study_ZZ2
  zone_1 â†’ Work_ZZ2
  zone_1 â†’ Shopping_ZZ2
  zone_1 â†’ Leisure_ZZ2
  zone_1 â†’ Other_ZZ2
  zone_2 â†’ Residential_ZZ3
  zone_2 â†’ Study_ZZ3
  zone_2 â†’ Work_ZZ3
  zone_2 â†’ Shopping_ZZ3
  zone_2 â†’ Leisure_ZZ3
  zone_2 â†’ Other_ZZ3
  zone_3 â†’ Residential_ZZ4
  zone_3 â†’ Study_ZZ4
  zone_3 â†’ Work_ZZ4
  zone_3 â†’ Shopping_ZZ4
  zone_3 â†’ Leisure_ZZ4
  zone_3 â†’ Other_ZZ4
  zone_4 â†’ Residential_ZZ5
  zone_4 â†’ Study_ZZ5
  zone_4 â†’ Work_ZZ5
  zone_4 â†’ Shopping_ZZ5
  zone_4 â†’ Leisure_Z

In [6]:
def visualize_pyg_graph_with_zones(data, sample_people_df, sample_households_df, purposes_df, zones_df):
    """
    Creates an interactive PyVis visualization from a PyG HeteroData graph.
    It maps PyG node indices to real-world names (P1, H2, Work_Z3, Z1) for better readability.
    """
    net = Network(notebook=True, height="800px", width="100%", directed=True)
    
    # Define colors for different node types
    color_map = {
        "zone": "red",
        "purpose": "blue",
        "household": "green",
        "person": "purple",
    }

    # Create mappings from index to actual node names
    person_id_map = {i: p_id for i, p_id in enumerate(sample_people_df["person_id"])}
    household_id_map = {i: h_id for i, h_id in enumerate(sample_households_df["household_id"])}
    purpose_id_map = {i: f"{row['purpose_type']}_Z{row['zone_id']}" for i, row in purposes_df.iterrows()}
    zone_id_map = {i: row["zone_id"] for i, row in zones_df.iterrows()}  # Fixing Zone Names

    def get_node_name(node_type, index):
        """ Maps a PyG node index to its real-world ID for visualization. """
        if node_type == "person":
            return person_id_map.get(index, f"Person_{index}")
        elif node_type == "household":
            return household_id_map.get(index, f"Household_{index}")
        elif node_type == "purpose":
            return purpose_id_map.get(index, f"Purpose_{index}")
        elif node_type == "zone":
            return zone_id_map.get(index, f"Zone_{index}")  # Ensure correct zone names
        return f"{node_type}_{index}"

    # Store added nodes to avoid duplicates
    added_nodes = set()

    # Add nodes with actual names
    for node_type in data.node_types:
        num_nodes = data[node_type].num_nodes
        for i in range(num_nodes):
            node_id = get_node_name(node_type, i)
            net.add_node(node_id, label=node_id, color=color_map.get(node_type, "gray"), title=node_type)
            added_nodes.add(node_id)

    # Add edges with readable node names
    for edge_type in data.edge_types:
        src_type, relation, dst_type = edge_type
        edge_index = data[edge_type].edge_index.numpy()

        for src, dst in zip(edge_index[0], edge_index[1]):
            src_id = get_node_name(src_type, src)
            dst_id = get_node_name(dst_type, dst)

            if src_id in added_nodes and dst_id in added_nodes:
                net.add_edge(src_id, dst_id, title=relation, width=1)

    # Enable physics for better layout
    net.toggle_physics(True)

    return net

In [7]:
# Generate an interactive PyVis visualization with Zones, Households, People, and Purposes correctly labeled
net_visual_zones = visualize_pyg_graph_with_zones(starting_graph_pyg, sample_people_df, sample_households_df, purposes_df, zones_df)
net_visual_zones.save_graph("graph_starting.html")

In [8]:
def construct_expected_graph(starting_graph, travel_diaries_df, purposes_df, person_id_map, purpose_id_map):
    """
    Constructs the expected output graph by adding dynamic Person-Purpose edges based on travel diaries.
    Maps person and purpose IDs to numerical indices.
    """
    expected_graph = starting_graph.clone()  # Clone the initial graph to modify

    src, dst, durations, rankings, joint_activities = [], [], [], [], []

    for _, row in travel_diaries_df.iterrows():
        person_id = row["person_id"]
        zone_id = row["zone_id"]
        purpose_name = row["purpose"]
        duration = row["duration"]
        ranking = row["ranking_in_day"]
        joint_activity = row["joint_activity"]

        # Convert person_id to index
        if person_id not in person_id_map:
            continue  # Skip if person ID is not in the map
        person_idx = person_id_map[person_id]

        # Find the correct purpose node index
        purpose_node = purposes_df[(purposes_df["zone_id"] == zone_id) & (purposes_df["purpose_type"] == purpose_name)]
        if purpose_node.empty:
            continue  # Skip if no matching purpose node found
        purpose_id = purpose_node.iloc[0]["purpose_id"]

        if purpose_id not in purpose_id_map:
            continue  # Skip if purpose ID is not in the map
        purpose_idx = purpose_id_map[purpose_id]

        # Add the new Person-Purpose edge
        src.append(person_idx)
        dst.append(purpose_idx)
        durations.append(duration)
        rankings.append(ranking)
        joint_activities.append(joint_activity)

    # Convert lists to PyTorch tensors and add edges
    expected_graph["person", "performs", "purpose"].edge_index = torch.tensor([src, dst], dtype=torch.long)
    expected_graph["person", "performs", "purpose"].duration = torch.tensor(durations, dtype=torch.float)
    expected_graph["person", "performs", "purpose"].ranking = torch.tensor(rankings, dtype=torch.long)
    expected_graph["person", "performs", "purpose"].joint_activity = torch.tensor(joint_activities, dtype=torch.bool)

    return expected_graph

# Generate ID mappings
person_id_map = {p_id: i for i, p_id in enumerate(sample_people_df["person_id"])}
purpose_id_map = {p_id: i for i, p_id in enumerate(purposes_df["purpose_id"])}

# Construct the expected output graph
expected_graph_pyg = construct_expected_graph(starting_graph_pyg, sample_travel_diaries_df, purposes_df, person_id_map, purpose_id_map)

In [9]:
# Generate an interactive PyVis visualization with Zones, Households, People, and Purposes correctly labeled
net_results = visualize_pyg_graph_with_zones(expected_graph_pyg, sample_people_df, sample_households_df, purposes_df, zones_df)
net_results.save_graph("graph_results.html")