In [1]:
import json
import os


In [2]:

# Specify the paths to your data files
data_dir = os.path.expanduser('~/.rc-cli/data/model_apply_inputs/')
route_data_file = os.path.join(data_dir, 'eval_route_data.json')
package_data_file = os.path.join(data_dir, 'eval_package_data.json')
travel_times_file = os.path.join(data_dir, 'eval_travel_times.json')

# Output files
output_dir = './filtered_data/'
os.makedirs(output_dir, exist_ok=True)
filtered_route_data_file = os.path.join(output_dir, 'filtered_route_data.json')
filtered_package_data_file = os.path.join(output_dir, 'filtered_package_data.json')
filtered_travel_times_file = os.path.join(output_dir, 'filtered_travel_times.json')


In [None]:
# Load route_data.json
with open(route_data_file, 'r') as file:
    route_data = json.load(file)

# Extract all station codes
station_codes = set()
for data in route_data.values():
    station_codes.add(data['station_code'])

print("Available Station Codes:")
for code in station_codes:
    print(code)

Available Station Codes:
DSE5
DSE4
DCH4
DSE2
DBO6
DLA7
DLA5
DBO2
DLA3
DBO1
DCH3
DCH2
DBO3
DCH1
DAU1
DLA9
DLA8
DLA4


In [5]:

# Parameters to adjust
TARGET_STATION_CODE = 'DSE5'  # Replace with your station code or leave as None
NUM_ROUTES = 100  # Number of routes to extract


In [6]:


# Filter RouteIDs based on station code or select random routes
if TARGET_STATION_CODE:
    # Filter routes by station code
    selected_route_ids = [rid for rid, data in route_data.items() if data['station_code'] == TARGET_STATION_CODE]
else:
    # If no station code is specified, select random routes
    import random
    selected_route_ids = list(route_data.keys())
    random.shuffle(selected_route_ids)

# Limit to the desired number of routes
selected_route_ids = selected_route_ids[:NUM_ROUTES]



In [7]:
# Function to extract data for selected routes
def extract_data(input_file, selected_route_ids, output_file):
    with open(input_file, 'r') as file:
        data = json.load(file)
    selected_data = {rid: data[rid] for rid in selected_route_ids if rid in data}
    with open(output_file, 'w') as file:
        json.dump(selected_data, file)



In [8]:
# Extract and save the filtered data
extract_data(route_data_file, selected_route_ids, filtered_route_data_file)
extract_data(package_data_file, selected_route_ids, filtered_package_data_file)
extract_data(travel_times_file, selected_route_ids, filtered_travel_times_file)

print(f"Extracted data for routes: {selected_route_ids}")
print(f"Filtered data saved in directory: {output_dir}")

Extracted data for routes: ['RouteID_04509bfe-b087-41b4-bdbc-54a467c65220', 'RouteID_0937c1c7-2c3a-4f77-9189-dd72544283ec', 'RouteID_0c074683-4b46-464f-bf77-71396ceef394', 'RouteID_0c52a8d0-fdce-4c98-be32-2312219c940f', 'RouteID_0d9d8038-c63c-442b-a768-0f02a87608bd', 'RouteID_0ddb5856-93ba-4d0c-9746-daa44898c3a7', 'RouteID_105057c0-2006-4ea3-87f0-a7d795438684', 'RouteID_11122fc9-f0f1-4cd4-b50e-cb3a459f72e0', 'RouteID_114d3c5a-d9e6-40d3-b6e8-24c906d570be', 'RouteID_129dbea4-c41d-4db8-87f6-53a31711b190', 'RouteID_15d0ee6e-2e64-4620-bed3-b37821e12228', 'RouteID_15f9a994-d468-44fe-95a8-82ce0ac768e5', 'RouteID_18be12b0-ce68-4fa7-b8fc-a54b924f5afc', 'RouteID_1caef4ef-db1a-47b2-a77a-593b0c5d9fc4', 'RouteID_1d839f0f-e315-4e30-bcf9-b433dd4379fd', 'RouteID_23e1d7c3-f51e-4933-92f3-1a6a14b93d1b', 'RouteID_25dcbccd-2804-41f6-9558-8cdd646c50c8', 'RouteID_28850462-e20a-4177-a475-732cf9e966a4', 'RouteID_2999f68b-dc4d-4005-b34b-1714464e8e68', 'RouteID_2e469d92-af48-4f5d-8aec-efd8ee0991eb', 'RouteID_2fe