In [1]:
import os
import osmnx as ox
import networkx as nx
import pandas as pd
import json

# Ensure compatibility with the networkx and osmnx versions
ox.config(use_cache=True, log_console=True)

# Set pandas display options for convenience (this does not affect the script's functionality)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# Source directory containing the original JSON files
source_directory = './json_files'
# Target directory where processed JSON files will be saved
target_directory = './processed_jsons'

# Ensure the target directory exists
os.makedirs(target_directory, exist_ok=True)

def process_json_file(file_path, output_path):
    # Load the JSON data from the file
    with open(file_path, 'r') as file:
        data = json.load(file)

    stops = pd.DataFrame(data['stops'])
    route_ids = data['route']
    # Retrieve the "kpi" data
    kpi_data = data.get('kpi', {})
    request_data = data.get('requests', {})

    # Extract the first stop's coordinates
    first_stop_lat = stops.iloc[0]['latitude']
    first_stop_lon = stops.iloc[0]['longitude']

    # Create a graph from the first stop's location within a 15km radius
    G = ox.graph_from_point((first_stop_lat, first_stop_lon), dist=15000, network_type='drive', simplify=False)

    # Function to get the nearest node to a given point
    def get_nearest_node(lat, lon):
        return ox.distance.nearest_nodes(G, X=lon, Y=lat)

    # Initialize a list to hold our new DataFrame's rows
    new_data = []

    # Map each stop's details by its ID for easy access
    stops_dict = stops.set_index('id').to_dict('index')

    # Function to extract detailed path data, avoiding duplicate nodes
    def extract_path_data(shortest_path, stop_info, next_stop_info, include_last_node=False):
        path_data = []
        nodes_to_include = shortest_path[:-1] if not include_last_node else shortest_path

        for node in nodes_to_include:
            node_lat, node_lon = G.nodes[node]['y'], G.nodes[node]['x']
            path_data.append({
                'node_id': node,
                'order': None,  # Order will be set later
                'latitude': node_lat,
                'longitude': node_lon,
                'is_compulsory_stop': None,
                'name': None,
                'earliest_time': None,
                'latest_time': None,
            })

        if path_data:
            path_data[0].update({
                'is_compulsory_stop': stop_info.get('is_compulsory', None),
                'name': stop_info.get('name', None),
                'earliest_time': stop_info.get('earliest_time', None),
                'latest_time': stop_info.get('latest_time', None),
            })
        if include_last_node and path_data:
            path_data[-1].update({
                'is_compulsory_stop': next_stop_info.get('is_compulsory', None),
                'name': next_stop_info.get('name', None),
                'earliest_time': next_stop_info.get('earliest_time', None),
                'latest_time': next_stop_info.get('latest_time', None),
            })

        return path_data

    # Iterate over route to calculate shortest paths and compile detailed data
    for i, node_id in enumerate(route_ids[:-1]):  # Exclude the last ID
        stop_info = stops_dict.get(node_id, {})
        next_stop_info = stops_dict.get(route_ids[i + 1], {})

        current_node = get_nearest_node(stop_info.get('latitude', 0), stop_info.get('longitude', 0))
        next_node = get_nearest_node(next_stop_info.get('latitude', 0), next_stop_info.get('longitude', 0))

        shortest_path = nx.shortest_path(G, current_node, next_node, weight='length')
        path_segment_data = extract_path_data(shortest_path, stop_info, next_stop_info, include_last_node=(i == len(route_ids) - 2))
        new_data.extend(path_segment_data)

    # Correct the order after all segments have been processed
    for index, item in enumerate(new_data):
        item['order'] = index

    detailed_df = pd.DataFrame(new_data)
    final_structure = {"stops": detailed_df.to_dict('records'), "requests": request_data, "kpi": kpi_data}

    # Save the processed data to the output file
    with open(output_path, 'w') as file:
        json.dump(final_structure, file, indent=4)

# Process each JSON file in the source directory
for filename in os.listdir(source_directory):
    if filename.endswith('.json') and not filename.endswith('instance.json'):
        file_path = os.path.join(source_directory, filename)
        output_path = os.path.join(target_directory, filename)

        # Check if the file already exists in the target directory
        if os.path.exists(output_path):
            print(f"Skipped: {filename}")
            continue  # Skip this file and move to the next one

        process_json_file(file_path, output_path)
        print(f"Processed and saved: {filename}")

  ox.config(use_cache=True, log_console=True)


Skipped: 198_H_19_44_00_comp_perc_0.0_demand_factor_1_node_dist_200_compulsory_stop_dist_200_latest_tf_1.2_proposed_tf_0.5_seed_0_walking_dist_500_util_value_1000_fixed_results.json
Skipped: 198_H_19_44_00_comp_perc_0.2_demand_factor_2_node_dist_200_compulsory_stop_dist_200_latest_tf_1.5_proposed_tf_0.5_seed_0_walking_dist_250_util_value_1000_fixed_results.json
Skipped: 198_H_19_44_00_comp_perc_0.4_demand_factor_1_node_dist_200_compulsory_stop_dist_200_latest_tf_1.2_proposed_tf_0.5_seed_0_walking_dist_250_util_value_1000_fixed_results.json
Skipped: 185_H_15_27_00_comp_perc_0.0_demand_factor_1_node_dist_200_compulsory_stop_dist_200_latest_tf_1.2_proposed_tf_0.5_seed_0_walking_dist_500_util_value_1000_semiflexible_results.json
Skipped: 198_H_19_44_00_comp_perc_1.0_demand_factor_1_node_dist_200_compulsory_stop_dist_200_latest_tf_1.5_proposed_tf_0.5_seed_0_walking_dist_100_util_value_1000_fixed_results.json
Skipped: 198_H_19_44_00_comp_perc_0.2_demand_factor_3_node_dist_200_compulsory_stop

In [2]:
# Get Time of Compulsory Stops (arrival at compulsory stop) into stops.

In [3]:
import os
import json

source_folder = 'processed_jsons'
destination_folder = 'destination_folder'

# Create the destination folder if it doesn't exist
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

# Process each JSON file in the source folder
for filename in os.listdir(source_folder):
    if filename.endswith('.json'):
        source_path = os.path.join(source_folder, filename)
        destination_path = os.path.join(destination_folder, filename)
        
        # Load the JSON data
        with open(source_path, 'r') as file:
            data = json.load(file)
        
        # Check if 'arrival_times_at_compulsory_stops' exists in 'kpi'
        if 'kpi' in data and 'arrival_times_at_compulsory_stops' in data['kpi']:
            arrival_times = data['kpi']['arrival_times_at_compulsory_stops']
            compulsory_stop_index = 0
            
            # Iterate over the stops and add arrival time to compulsory stops
            for stop in data['stops']:
                if stop['is_compulsory_stop']:
                    if compulsory_stop_index < len(arrival_times):
                        stop['arrival_time'] = arrival_times[compulsory_stop_index]
                        compulsory_stop_index += 1
            
            # Remove 'arrival_times_at_compulsory_stops' from 'kpi'
            del data['kpi']['arrival_times_at_compulsory_stops']
        
        # Save the modified data to the new file in the destination folder
        with open(destination_path, 'w') as file:
            json.dump(data, file, indent=4)

print("Processing complete.")

Processing complete.


# Import necessary Packages and Settings

In [2]:
import osmnx as ox
import networkx as nx
import pandas as pd
import json

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Parse Json Data

In [3]:
# Define the path to the file
file_path = 'public/data/scenarios/scenario_1.json'

# Load the JSON data from the file
with open(file_path, 'r') as file:
    data = json.load(file)

stops = pd.DataFrame(data['stops'])
route_ids = data['route']

## Create Graph from first point

In [4]:
# Extract the first stop's coordinates
first_stop_lat = stops.iloc[0]['latitude']
first_stop_lon = stops.iloc[0]['longitude']

# Create a graph from the first stop's location within a 15km radius
G = ox.graph_from_point((first_stop_lat, first_stop_lon), dist=15000, network_type='drive', simplify=False)
# G = G.to_undirected()

## Get Path

In [5]:
# Map each stop's details by its ID for easy access
stops_dict = stops.set_index('id').to_dict('index')

# Function to get the nearest node to a given point
def get_nearest_node(lat, lon):
    return ox.distance.nearest_nodes(G, X=lon, Y=lat)

# Initialize a list to hold our new DataFrame's rows
new_data = []

# Function to extract detailed path data, avoiding duplicate nodes
def extract_path_data(shortest_path, stop_info, next_stop_info, include_last_node=False):
    path_data = []
    # Include the last node only if specified (for the last segment in the route)
    nodes_to_include = shortest_path[:-1] if not include_last_node else shortest_path

    for node in nodes_to_include:
        node_lat, node_lon = G.nodes[node]['y'], G.nodes[node]['x']
        path_data.append({
            'node_id': node,
            'order': None,  # Order will be set later based on the sequence in new_data
            'latitude': node_lat,
            'longitude': node_lon,
            'is_compulsory_stop': None,
            'name': None,
            'earliest_time': None,
            'latest_time': None,
        })

    # Set additional info for the first node of the path segment
    if path_data:
        path_data[0]['is_compulsory_stop'] = stop_info.get('is_compulsory', None)
        path_data[0]['name'] = stop_info.get('name', None)
        path_data[0]['earliest_time'] = stop_info.get('earliest_time', None)
        path_data[0]['latest_time'] = stop_info.get('latest_time', None)
    if include_last_node and path_data:
        path_data[-1]['is_compulsory_stop'] = next_stop_info.get('is_compulsory', None)
        path_data[-1]['name'] = next_stop_info.get('name', None)
        path_data[-1]['earliest_time'] = next_stop_info.get('earliest_time', None)
        path_data[-1]['latest_time'] = next_stop_info.get('latest_time', None)

    return path_data

# Iterate over route to calculate shortest paths and compile detailed data
for i, node_id in enumerate(route_ids[:-1]):  # Exclude the last ID since it has no next node to connect
    stop_info = stops_dict.get(node_id, {})
    next_stop_info = stops_dict.get(route_ids[i + 1], {})

    # Get nearest graph nodes for the current and next stop
    current_node = get_nearest_node(stop_info.get('latitude', 0), stop_info.get('longitude', 0))
    next_node = get_nearest_node(next_stop_info.get('latitude', 0), next_stop_info.get('longitude', 0))

    # Calculate the shortest path
    shortest_path = nx.shortest_path(G, current_node, next_node, weight='length')
    
    # Extract detailed path data, specify to include the last node only for the last segment
    path_segment_data = extract_path_data(shortest_path, stop_info, next_stop_info, include_last_node=(i == len(route_ids) - 2))
    new_data.extend(path_segment_data)

# Correct the order after all segments have been processed
for index, item in enumerate(new_data):
    item['order'] = index

# Create the DataFrame
detailed_df = pd.DataFrame(new_data)

In [6]:
detailed_df.head(6)

Unnamed: 0,node_id,order,latitude,longitude,is_compulsory_stop,name,earliest_time,latest_time
0,11658141,0,48.106733,11.627877,True,Emdenstraße,06:14:30,06:14:30
1,1496695591,1,48.106939,11.627808,,,,
2,674385779,2,48.107121,11.62774,,,,
3,31797854,3,48.107176,11.627721,,,,
4,27186467,4,48.107528,11.627552,,,,
5,31797680,5,48.107586,11.627526,,,,


## Save Dataframe to Json File

In [44]:
# Convert the DataFrame to a JSON string with the desired structure
json_str = detailed_df.to_json(orient='records')

# Parse the JSON string back into a Python list of dictionaries
records = json.loads(json_str)

# Wrap this list into the desired outer structure with the "stops" key
final_structure = {"stops": records}

# Convert this final structure back into a JSON string
final_json_str = json.dumps(final_structure, indent=4)

# Save the final JSON string to a file
with open('./public/data/scenarios/output.json', 'w') as file:
    file.write(final_json_str)