In [43]:
import json
import os

# Specify the paths to your data files
data_dir = os.path.expanduser('~/.rc-cli/data/model_apply_inputs/')
route_data_file = os.path.join(data_dir, 'eval_route_data.json')
package_data_file = os.path.join(data_dir, 'eval_package_data.json')
travel_times_file = os.path.join(data_dir, 'eval_travel_times.json')

# Output files
output_dir = './filtered_data/'
os.makedirs(output_dir, exist_ok=True)
filtered_route_data_file = os.path.join(output_dir, 'filtered_route_data.json')
filtered_package_data_file = os.path.join(output_dir, 'filtered_package_data.json')
filtered_travel_times_file = os.path.join(output_dir, 'filtered_travel_times.json')

# Load route_data.json
with open(route_data_file, 'r') as file:
    route_data = json.load(file)

# Extract all station codes
station_codes = set()
for data in route_data.values():
    station_codes.add(data['station_code'])

print("Available Station Codes:")
for code in station_codes:
    print(code)

# Parameters to adjust
TARGET_STATION_CODE = 'DSE5'  # Replace with your station code or leave as None
NUM_ROUTES = 1  # Number of routes to extract
MAX_STOPS_PER_ROUTE = 10  # Maximum number of stops per route

# Filter RouteIDs based on station code or select random routes
if TARGET_STATION_CODE:
    # Filter routes by station code
    selected_route_ids = [rid for rid, data in route_data.items() if data['station_code'] == TARGET_STATION_CODE]
else:
    # If no station code is specified, select random routes
    import random
    selected_route_ids = list(route_data.keys())
    random.shuffle(selected_route_ids)

# Limit to the desired number of routes
selected_route_ids = selected_route_ids[:NUM_ROUTES]

# Function to extract route data with limited stops
def extract_route_data(input_file, selected_route_ids, output_file, max_stops_per_route=None):
    with open(input_file, 'r') as file:
        data = json.load(file)
    selected_data = {}
    for rid in selected_route_ids:
        if rid in data:
            route_info = data[rid]
            if max_stops_per_route is not None and 'stops' in route_info:
                # Limit the number of stops
                stops = route_info['stops']
                stop_ids = list(stops.keys())[:max_stops_per_route]
                route_info['stops'] = {sid: stops[sid] for sid in stop_ids}
            selected_data[rid] = route_info
    with open(output_file, 'w') as file:
        json.dump(selected_data, file)

# Extract and save the filtered route data
extract_route_data(route_data_file, selected_route_ids, filtered_route_data_file, max_stops_per_route=MAX_STOPS_PER_ROUTE)

# Extract selected stops from the filtered route data
with open(filtered_route_data_file, 'r') as file:
    filtered_route_data = json.load(file)

selected_stop_ids = []
for rid, route_info in filtered_route_data.items():
    stops = route_info.get('stops', {})
    selected_stop_ids.extend(stops.keys())

# Function to extract package data for selected stops
def extract_package_data(input_file, selected_route_ids, output_file, selected_stop_ids):
    with open(input_file, 'r') as file:
        data = json.load(file)
    selected_data = {}
    for rid in selected_route_ids:
        if rid in data:
            route_packages = data[rid]
            # Filter packages for selected stops
            selected_packages = {sid: route_packages[sid] for sid in selected_stop_ids if sid in route_packages}
            selected_data[rid] = selected_packages
    with open(output_file, 'w') as file:
        json.dump(selected_data, file)

# Function to extract travel times for selected stops
def extract_travel_times(input_file, selected_route_ids, output_file, selected_stop_ids):
    with open(input_file, 'r') as file:
        data = json.load(file)
    selected_data = {}
    for rid in selected_route_ids:
        if rid in data:
            route_times = data[rid]
            # Filter travel times for selected stops
            filtered_times = {}
            for from_stop in selected_stop_ids:
                if from_stop in route_times:
                    times_to_stops = route_times[from_stop]
                    filtered_times_to_stops = {to_stop: times_to_stops[to_stop] for to_stop in selected_stop_ids if to_stop in times_to_stops}
                    filtered_times[from_stop] = filtered_times_to_stops
            selected_data[rid] = filtered_times
    with open(output_file, 'w') as file:
        json.dump(selected_data, file)

# Extract and save the filtered package data
extract_package_data(package_data_file, selected_route_ids, filtered_package_data_file, selected_stop_ids)

# Extract and save the filtered travel times data
extract_travel_times(travel_times_file, selected_route_ids, filtered_travel_times_file, selected_stop_ids)

print(f"Extracted data for routes: {selected_route_ids}")
print(f"Selected stops: {selected_stop_ids}")
print(f"Filtered data saved in directory: {output_dir}")

Available Station Codes:
DAU1
DCH3
DCH4
DLA8
DSE4
DSE5
DBO6
DLA7
DBO2
DLA9
DCH2
DBO3
DBO1
DLA3
DCH1
DSE2
DLA4
DLA5
Extracted data for routes: ['RouteID_04509bfe-b087-41b4-bdbc-54a467c65220']
Selected stops: ['AD', 'AF', 'AG', 'AI', 'AR', 'BA', 'BE', 'BG', 'BP', 'BT']
Filtered data saved in directory: ./filtered_data/


In [36]:
import json
import os


In [37]:

# Specify the paths to your data files
data_dir = os.path.expanduser('~/.rc-cli/data/model_apply_inputs/')
route_data_file = os.path.join(data_dir, 'eval_route_data.json')
package_data_file = os.path.join(data_dir, 'eval_package_data.json')
travel_times_file = os.path.join(data_dir, 'eval_travel_times.json')

# Output files
output_dir = './filtered_data/'
os.makedirs(output_dir, exist_ok=True)
filtered_route_data_file = os.path.join(output_dir, 'filtered_route_data.json')
filtered_package_data_file = os.path.join(output_dir, 'filtered_package_data.json')
filtered_travel_times_file = os.path.join(output_dir, 'filtered_travel_times.json')


In [38]:
# Load route_data.json
with open(route_data_file, 'r') as file:
    route_data = json.load(file)

# Extract all station codes
station_codes = set()
for data in route_data.values():
    station_codes.add(data['station_code'])

print("Available Station Codes:")
for code in station_codes:
    print(code)

Available Station Codes:
DAU1
DCH3
DCH4
DLA8
DSE4
DSE5
DBO6
DLA7
DBO2
DLA9
DCH2
DBO3
DBO1
DLA3
DCH1
DSE2
DLA4
DLA5


In [39]:

# Parameters to adjust
TARGET_STATION_CODE = 'DSE5'  # Replace with your station code or leave as None
NUM_ROUTES = 1  # Number of routes to extract


In [40]:


# Filter RouteIDs based on station code or select random routes
if TARGET_STATION_CODE:
    # Filter routes by station code
    selected_route_ids = [rid for rid, data in route_data.items() if data['station_code'] == TARGET_STATION_CODE]
else:
    # If no station code is specified, select random routes
    import random
    selected_route_ids = list(route_data.keys())
    random.shuffle(selected_route_ids)

# Limit to the desired number of routes
selected_route_ids = selected_route_ids[:NUM_ROUTES]



In [41]:
# Function to extract data for selected routes
def extract_data(input_file, selected_route_ids, output_file):
    with open(input_file, 'r') as file:
        data = json.load(file)
    selected_data = {rid: data[rid] for rid in selected_route_ids if rid in data}
    with open(output_file, 'w') as file:
        json.dump(selected_data, file)



In [42]:
# Extract and save the filtered data
extract_data(route_data_file, selected_route_ids, filtered_route_data_file)
extract_data(package_data_file, selected_route_ids, filtered_package_data_file)
extract_data(travel_times_file, selected_route_ids, filtered_travel_times_file)

print(f"Extracted data for routes: {selected_route_ids}")
print(f"Filtered data saved in directory: {output_dir}")

KeyboardInterrupt: 