In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import os
from geopy.distance import geodesic

# --- Configuration ---
content_dir = '/content'
parquet_files = [f for f in os.listdir(content_dir) if f.endswith('.parquet')]

# Colors for plotting
route_colors = {
    "loop": "#4B0082",    # Indigo
    "open": "#FF4500"     # Orange Red
}

# --- Helper functions ---
def extract_route_coordinates(geometry):
    if geometry is None or geometry.is_empty:
        return None
    return [(coord[1], coord[0]) for coord in geometry.coords]

# --- Process each parquet file ---
for parquet_file in parquet_files:
    print(f"\nProcessing: {parquet_file}")
    file_path = os.path.join(content_dir, parquet_file)
    gdf = gpd.read_parquet(file_path)

    start_stop_matches = []

    for geom in gdf.geometry:
        coords = extract_route_coordinates(geom)
        if coords is None or len(coords) < 2:
            continue

        # Check if start and stop are within 500 meters
        start = coords[0]
        stop = coords[-1]
        same_location = geodesic(start, stop).meters <= 500
        start_stop_matches.append(same_location)

    if not start_stop_matches:
        print(f"No valid routes in {parquet_file}")
        continue

    # Count loops and open-ended
    n_loops = sum(start_stop_matches)
    n_open = len(start_stop_matches) - n_loops
    print(f"Loops: {n_loops}, Open-ended: {n_open}")

    # --- Plot histogram ---
    plt.figure(figsize=(6, 6))
    plt.bar(["Loops", "Open-ended"], [n_loops, n_open], color=[route_colors["loop"], route_colors["open"]])
    plt.ylabel("Number of Routes", fontsize=12)
    plt.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.savefig(f"/content/loops_vs_open_routes_{parquet_file[:-8]}.png", dpi=150)
    plt.show()
    plt.close()
