In [1]:
# 05_granite_input_prep.ipynb — Prepare structured trip summaries
import pandas as pd
import json
from pathlib import Path

# Load previously generated trip summary
trip_summary = pd.read_csv("../data/trip_summary_stats.csv")

# Clean up trip names (shorter labels)
trip_summary["Trip"] = trip_summary["Trip"].str.replace(".csv", "", regex=False)

# Define fields to include in Granite input
fields = [
    "Trip",
    "Avg_Speed_kmh",
    "Avg_RPM",
    "Avg_Fuel_L/100km",
    "HighRPM_LowSpeed_%",
    "Long_Idle_%",
    "Accel_Events",
    "Brake_Events"
]

trip_summary = trip_summary[fields]

# Build structured data for Granite
granite_inputs = []
for _, row in trip_summary.iterrows():
    entry = {
        "trip_name": row["Trip"],
        "metrics": {
            "average_speed_kmh": round(row["Avg_Speed_kmh"], 1),
            "average_rpm": round(row["Avg_RPM"], 1),
            "average_fuel_l_100km": round(row["Avg_Fuel_L/100km"], 2),
            "high_rpm_low_speed_percent": row["HighRPM_LowSpeed_%"],
            "long_idle_percent": row["Long_Idle_%"],
            "accel_events": int(row["Accel_Events"]),
            "brake_events": int(row["Brake_Events"]),
        },
        "patterns_detected": []
    }

    # Simple flags for context (these will guide Granite feedback prompts)
    if row["HighRPM_LowSpeed_%"] > 10:
        entry["patterns_detected"].append("High RPM at low speed")
    if row["Long_Idle_%"] > 15:
        entry["patterns_detected"].append("Long idling periods")
    if (row["Accel_Events"] + row["Brake_Events"]) > 200:
        entry["patterns_detected"].append("Frequent acceleration–brake cycles")

    granite_inputs.append(entry)

# Save all summaries as a JSON file
output_path = Path("../data/granite_trip_summaries.json")
with open(output_path, "w") as f:
    json.dump(granite_inputs, f, indent=2)

print(f"✅ Exported {len(granite_inputs)} structured trip summaries to {output_path}")


✅ Exported 75 structured trip summaries to ../data/granite_trip_summaries.json
