In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import os
from geopy.distance import geodesic
from math import atan2, degrees

# --- Configuration ---
content_dir = '/content'
parquet_files = [f for f in os.listdir(content_dir) if f.endswith('.parquet')]

# Colors
turn_colors = {
    "straight": "#FFA500",  # Orange
    "left": "#1E90FF",      # Light Blue
    "right": "#2CA02C"      # Green
}

# --- Helper functions ---
def extract_route_coordinates(geometry):
    if geometry is None or geometry.is_empty:
        return None
    return [(coord[1], coord[0]) for coord in geometry.coords]

def compute_azimuth(p1, p2):
    lat1, lon1 = np.radians(p1)
    lat2, lon2 = np.radians(p2)
    d_lon = lon2 - lon1
    x = np.sin(d_lon) * np.cos(lat2)
    y = np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(lat2) * np.cos(d_lon)
    az = np.degrees(atan2(x, y))
    return (az + 360) % 360

def classify_turns(route, threshold=45):
    if route is None or len(route) < 3:
        return []
    turn_types = []
    for i in range(len(route) - 2):
        az1 = compute_azimuth(route[i], route[i+1])
        az2 = compute_azimuth(route[i+1], route[i+2])
        delta = (az2 - az1 + 180) % 360 - 180
        if abs(delta) <= threshold:
            turn_types.append("straight")
        elif delta > 0:
            turn_types.append("right")
        else:
            turn_types.append("left")
    return turn_types

# --- Process each parquet file ---
for parquet_file in parquet_files:
    print(f"\nProcessing: {parquet_file}")
    file_path = os.path.join(content_dir, parquet_file)
    gdf = gpd.read_parquet(file_path)

    all_turn_counts = []
    route_labels = []

    for idx, geom in enumerate(gdf.geometry):
        coords = extract_route_coordinates(geom)
        if coords is None or len(coords) < 3:
            continue

        turns = classify_turns(coords)
        turn_counts = [turns.count(t) for t in ["straight", "left", "right"]]
        all_turn_counts.append(turn_counts)
        route_labels.append(f"{idx}")

    if not all_turn_counts:
        print(f"No valid routes in {parquet_file}")
        continue

    # Convert to numpy array for easier plotting
    all_turn_counts = np.array(all_turn_counts)
    n_routes = all_turn_counts.shape[0]
    x = np.arange(n_routes)  # Route indices
    width = 0.25  # Bar width

    # --- Plot grouped bar chart ---
    plt.figure(figsize=(12, 6))
    plt.bar(x - width, all_turn_counts[:, 0], width, color=turn_colors["straight"], label="Straight")
    plt.bar(x, all_turn_counts[:, 1], width, color=turn_colors["left"], label="Left")
    plt.bar(x + width, all_turn_counts[:, 2], width, color=turn_colors["right"], label="Right")

    plt.xlabel("Routes", fontsize=12)
    plt.ylabel("Number of Turns", fontsize=12)
    plt.xticks(x, route_labels, rotation=45, ha='right')
    plt.legend()
    plt.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.savefig(f"/content/turn_distribution_all_routes_{parquet_file[:-8]}.png", dpi=150)
    plt.show()
    plt.close()
