## Include Libraries

In [21]:
import pandas as pd
import numpy as np
import os

## Data process

In [17]:
NODES_FILE = 'data/hcmc/nodes.csv'
SEGMENTS_FILE = 'data/hcmc/segments.csv'
STREETS_FILE = 'data/hcmc/streets.csv'

nodesDf = pd.read_csv(NODES_FILE, index_col = '_id')
nodesDf = nodesDf.rename(columns = {'long' : 'lon', 'lat' : 'lat'})

streetsDf = pd.read_csv(STREETS_FILE, index_col = '_id')
streetsDf['max_velocity'] = streetsDf['max_velocity'].fillna(50)

segmentsDf = pd.read_csv(SEGMENTS_FILE, index_col = '_id')
segmentsDf['max_velocity'] = segmentsDf['max_velocity'].fillna(50)

In [18]:
nodesDf

Unnamed: 0_level_0,lon,lat
_id,Unnamed: 1_level_1,Unnamed: 2_level_1
366367223,106.629056,10.804243
366367233,106.709701,10.771110
366367242,106.737189,10.709337
366367274,106.760081,10.854489
366367285,106.721163,10.804994
...,...,...
6202895387,106.647884,10.886330
6202895388,106.649074,10.876678
6203301188,106.700737,10.774919
6203333885,106.699275,10.768892


In [19]:
streetsDf

Unnamed: 0_level_0,level,max_velocity,name,type
_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
31096786,1,80.0,Quốc Lộ 1,trunk
32575737,4,50.0,,unclassified
32575794,4,50.0,Chu Văn An,unclassified
32575820,4,50.0,Nguyễn Văn Bá,tertiary
32575823,4,50.0,Nguyễn Thị Nhỏ,tertiary
...,...,...,...,...
656562464,4,50.0,,unclassified
656564397,4,50.0,,unclassified
656850719,4,50.0,,unclassified
656851094,4,50.0,,unclassified


In [20]:
segmentsDf

Unnamed: 0_level_0,created_at,updated_at,s_node_id,e_node_id,length,street_id,max_velocity,street_level,street_name,street_type
_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,2020-10-18T13:26:17.365Z,2020-10-18T13:26:17.365Z,373543511,5468660805,114,31096786,80.0,1,Quốc Lộ 1,trunk
1,2020-10-18T13:26:17.400Z,2020-10-18T13:26:17.400Z,5468660805,5738158916,9,31096786,80.0,1,Quốc Lộ 1,trunk
2,2020-10-18T13:26:17.435Z,2020-10-18T13:26:17.435Z,5738158916,5738158918,23,31096786,80.0,1,Quốc Lộ 1,trunk
3,2020-10-18T13:26:17.444Z,2020-10-18T13:26:17.444Z,5738158918,5738158912,66,31096786,80.0,1,Quốc Lộ 1,trunk
4,2020-10-18T13:26:17.452Z,2020-10-18T13:26:17.452Z,5738158912,5758104203,127,31096786,80.0,1,Quốc Lộ 1,trunk
...,...,...,...,...,...,...,...,...,...,...
84628,2020-10-18T13:30:29.795Z,2020-10-18T13:30:29.795Z,5778600776,411925919,42,658328101,50.0,4,Võ Văn Tần,tertiary
84629,2020-10-18T13:30:29.797Z,2020-10-18T13:30:29.797Z,411925919,3116310151,39,658328101,50.0,4,Võ Văn Tần,tertiary
84630,2020-10-18T13:30:29.799Z,2020-10-18T13:30:29.799Z,3116310151,5778360106,22,658328101,50.0,4,Võ Văn Tần,tertiary
84631,2020-10-18T13:30:29.802Z,2020-10-18T13:30:29.802Z,5778360106,5763168795,37,658328101,50.0,4,Võ Văn Tần,tertiary


In [None]:
def extract_graph_to_txt(nodesDf: pd.DataFrame, segmentsDf: pd.DataFrame, output_filename: str = 'map_graph.txt'):
    """
    Extracts graph structure from DataFrames and writes it to a TXT file 
    in the specified format (excluding GPS trajectory K lines).
    
    :param nodesDf: DataFrame with node coordinates (index is assumed to be node ID).
    :param segmentsDf: DataFrame with road segments.
    :param output_filename: The name of the output file.
    """
    
    # 1. Prepare Node Data
    # Assume the index of nodesDf is the unique node ID (u, v).
    # Reset index to make 'node_id' an explicit column for merging.
    nodesDf = nodesDf[['lon', 'lat']].copy()
    nodesDf = nodesDf.reset_index(names=['node_id'])

    # Prepare lookup dictionaries for coordinates
    lon_map = nodesDf.set_index('node_id')['lon'].to_dict()
    lat_map = nodesDf.set_index('node_id')['lat'].to_dict()

    # 2. Extract Edges (Segments)
    # Each row in segmentsDf is a directed edge (u, v)
    edgesDf = segmentsDf[['s_node_id', 'e_node_id', 'max_velocity', 'street_level', 'street_type', 'street_name']].copy()
    
    # Clean up column names for clarity
    edgesDf = edgesDf.rename(columns={
        's_node_id': 'u',
        'e_node_id': 'v',
        'max_velocity': 'limit'
    })
    
    # 3. Determine N and M
    N = len(nodesDf)
    M = len(edgesDf)
    # K is the size of the GPS trajectory, which we set to 0 as per the prompt.
    K = 0

    # 4. Determine isOneway flag for each segment
    # A segment (u, v) is Oneway=1 if the reverse segment (v, u) 
    # for the same street does NOT exist in the DataFrame.
    
    # Create a set of all (u, v, street_name) tuples for quick lookup
    forward_segments = set(edgesDf[['u', 'v', 'street_name']].apply(tuple, axis=1))

    # Function to check for the reverse segment's existence on the same street
    def check_is_oneway(row):
        u, v, street_name = row['u'], row['v'], row['street_name']
        # Check if the reverse segment (v, u) exists for the same street
        if (v, u, street_name) in forward_segments:
            # It's part of a bi-directional road
            return 0  # Bi-directional
        else:
            # It's a one-way segment
            return 1  # Uni-directional

    edgesDf['isOneway'] = edgesDf.apply(check_is_oneway, axis=1)

    # 5. Write to File
    with open(output_filename, 'w') as f:
        # First line: N, M, K
        f.write(f"{N} {M} {K}\n")

        # Next 2M lines: Edges
        # Iterate over each row (which represents a directed edge/segment)
        for index, row in edgesDf.iterrows():
            u, v = int(row['u']), int(row['v'])
            limit = float(row['limit'])
            isOneway = int(row['isOneway'])
            
            # --- First line of the edge pair ---
            # u, v, limit, isOneway
            f.write(f"{u} {v} {limit:.2f} {isOneway}\n")

            # --- Second line of the edge pair (Polyline) ---
            # Get coordinates for the polyline (u -> v)
            # l = 2 (start and end point), followed by 2*l real numbers (lon_u, lat_u, lon_v, lat_v)
            lon_u, lat_u = lon_map.get(u), lat_map.get(v)
            lon_v, lat_v = lon_map.get(u), lat_map.get(v)

            # Safety check: ensure both nodes exist
            if lon_u is None or lat_u is None or lon_v is None or lat_v is None:
                 print(f"Warning: Node ID {u} or {v} not found in nodesDf. Skipping edge.")
                 continue

            # l is the number of coordinate pairs (nodes in the polyline)
            l = 2
            
            # Polyline: lon_u, lat_u, lon_v, lat_v
            polyline_str = f"{lon_u:.6f} {lat_u:.6f} {lon_v:.6f} {lat_v:.6f}"
            
            # The second line: l, then 2l real numbers
            f.write(f"{l} {polyline_str}\n")
    
    print(f"✅ Successfully extracted graph with N={N} nodes and M={M} edges to '{output_filename}'")
    print(f"The format includes node IDs (u, v), speed limit, isOneway flag, and polyline coordinates.")

In [None]:
output_file = 'map_graph_output.txt'
extract_graph_to_txt(nodesDf, segmentsDf, output_file)

# --- Print the content of the generated file for verification ---
print("\n--- Content of map_graph_output.txt ---")
if os.path.exists(output_file):
    with open(output_file, 'r') as f:
        print(f.read())
else:
    print("File was not created.")

✅ Successfully extracted graph with N=52425 compressed nodes and M=84633 edges to 'map_graph_compressed.txt'
Compressed node IDs range from 1 to 52425.

--- Content of map_graph_compressed.txt ---
52425 84633 0
6768 34397 80.00 1
2 106.601780 10.727718 106.601621 10.726701
34397 37426 80.00 1
2 106.601621 10.726701 106.601607 10.726613
37426 37427 80.00 1
2 106.601607 10.726613 106.601574 10.726401
37427 37425 80.00 1
2 106.601574 10.726401 106.601481 10.725809
37425 44286 80.00 1
2 106.601481 10.725809 106.601277 10.724676
44286 14333 80.00 1
2 106.601277 10.724676 106.601241 10.724474
14333 26161 80.00 1
2 106.601241 10.724474 106.601241 10.724338
26161 26162 80.00 1
2 106.601241 10.724338 106.601305 10.724210
26162 26163 80.00 1
2 106.601305 10.724210 106.601353 10.724132
26163 14329 80.00 1
2 106.601353 10.724132 106.601431 10.724037
5399 45313 50.00 1
2 106.609718 10.860852 106.610603 10.860403
45313 5399 50.00 1
2 106.610603 10.860403 106.609718 10.860852
6753 8011 50.00 0
2 106.