In [3]:
import os

os.chdir("/Users/akhilpartheeban/Documents/ai-route-engine")
print("Current working directory:", os.getcwd())

df = pd.read_csv("data/atlanta_stops.csv")
print("Stops loaded:", len(df))
df.head()


Current working directory: /Users/akhilpartheeban/Documents/ai-route-engine
Stops loaded: 100


Unnamed: 0,latitude,longitude
0,33.76641,-84.400169
1,33.747496,-84.335504
2,33.701687,-84.37858
3,33.748385,-84.473807
4,33.750017,-84.313847


In [4]:
# Cell 1 — Pull full Atlanta road network and generate stops
import pandas as pd
import osmnx as ox

print("Stops loaded:", len(df))

# Atlanta city-wide bounding box (confirmed full coverage)
bbox = {
    "north": 33.9150,
    "south": 33.6000,
    "east": -84.2000,
    "west": -84.6000
}

print("Loading full Atlanta road graph...")
G = ox.graph_from_bbox(
    north=bbox["north"],
    south=bbox["south"],
    east=bbox["east"],
    west=bbox["west"],
    network_type="drive"
)
print("Graph loaded with:", len(G.nodes), "nodes")

print("Snapping stops to road network...")
df['node'] = ox.nearest_nodes(G, df.longitude, df.latitude)

print("Unique snapped stops:", df['node'].nunique())
display(df.head())



Stops loaded: 100
Loading full Atlanta road graph...


  G = ox.graph_from_bbox(


Graph loaded with: 44660 nodes
Snapping stops to road network...
Unique snapped stops: 100


Unnamed: 0,latitude,longitude,node
0,33.76641,-84.400169,7550292110
1,33.747496,-84.335504,68203763
2,33.701687,-84.37858,69280346
3,33.748385,-84.473807,69300752
4,33.750017,-84.313847,6958018021


In [22]:
# Cell 2 — Compute shortest-path road distances between stops

import numpy as np
import networkx as nx
from tqdm import tqdm

stops = df['node'].tolist()
n = len(stops)

dist_lookup = {}

for i in tqdm(range(n), desc="Computing distances"):
    for j in range(n):
        if i != j:
            try:
                dist_lookup[(stops[i], stops[j])] = nx.shortest_path_length(
                    G, stops[i], stops[j], weight='length')
            except:
                dist_lookup[(stops[i], stops[j])] = np.inf

print("Pairs computed:", len(dist_lookup))


Computing distances: 100%|████████████████████| 100/100 [07:17<00:00,  4.37s/it]

Pairs computed: 9900





In [23]:
# Cell 2.5 — Drop unreachable stops (important!)

reachable = []
origin = df['node'].iloc[0]  # choose first as depot

for node in df['node']:
    try:
        nx.shortest_path_length(G, origin, node, weight="length")
        reachable.append(node)
    except:
        pass

print("Reachable stops:", len(reachable))

df = df[df['node'].isin(reachable)].reset_index(drop=True)
print("Filtered stops:", len(df))
df.head()


Reachable stops: 99
Filtered stops: 99


Unnamed: 0,latitude,longitude,node
0,33.76641,-84.400169,7550292110
1,33.747496,-84.335504,68203763
2,33.701687,-84.37858,69280346
3,33.748385,-84.473807,69300752
4,33.750017,-84.313847,6958018021


In [30]:
# Cell 3 — Build ML training dataset: distance + traffic + road class

df_ml = pd.DataFrame([
    {"from": frm, "to": to, "distance_m": d}
    for (frm, to), d in dist_lookup.items() if d < np.inf
])

df_ml["hour"] = np.random.randint(0, 24, size=len(df_ml))
df_ml["road_class"] = pd.cut(df_ml["distance_m"],
                             bins=[0, 5000, 15000, 50000],
                             labels=[0, 1, 2])

traffic = {
    h: (0.6 if 7 <= h <= 9 or 16 <= h <= 18 else
        0.9 if 10 <= h <= 15 else 1.2)
    for h in range(24)
}

df_ml["traffic"] = df_ml["hour"].map(traffic)

BASE_SPEED = 10
df_ml["travel_time_sec"] = df_ml["distance_m"] / (BASE_SPEED * df_ml["traffic"])

df_ml.head()



Unnamed: 0,from,to,distance_m,hour,road_class,traffic,travel_time_sec
0,7550292110,68203763,7706.27,3,1,1.2,642.189167
1,7550292110,69280346,8815.946,3,1,1.2,734.662167
2,7550292110,69300752,8426.432,7,1,0.6,1404.405333
3,7550292110,6958018021,9716.601,23,1,1.2,809.71675
4,7550292110,7848705399,11489.665,11,1,0.9,1276.629444


In [31]:
# Cell 4 — Train ML travel time model

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

features = ["distance_m", "hour", "road_class", "traffic"]
target = "travel_time_sec"

X_train, X_test, y_train, y_test = train_test_split(
    df_ml[features], df_ml[target], test_size=0.2, random_state=42
)

rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)

pred = rf.predict(X_test)
mae = mean_absolute_error(y_test, pred)
print("MAE:", round(mae, 2), "seconds")

pd.DataFrame({"distance": X_test["distance_m"].head(),
              "true": y_test.head().values,
              "pred": pred[:5]})



MAE: 0.75 seconds


Unnamed: 0,distance,true,pred
3107,8764.017,973.779667,972.084256
2157,15990.031,1332.502583,1332.614796
6022,15496.714,1721.857111,1722.205936
9588,5192.36,865.393333,865.248162
5214,11296.099,1882.683167,1882.757811


In [32]:
# Cell 5 — Baseline route: nearest stop by shortest road distance

import random

route_baseline = []
unvisited = stops.copy()

# Start at a random starting stop
current = random.choice(unvisited)
route_baseline.append(current)
unvisited.remove(current)

total_distance_baseline = 0

while unvisited:
    nearest = None
    nearest_dist = float('inf')
    for nxt in unvisited:
        d = dist_lookup.get((current, nxt), float('inf'))
        if d < nearest_dist:
            nearest = nxt
            nearest_dist = d
    
    if nearest is None:
        break  # safety break if stuck
    
    total_distance_baseline += nearest_dist
    current = nearest
    route_baseline.append(current)

    if current in unvisited:   # <— NEW safety check
        unvisited.remove(current)

print("Baseline stops visited:", len(route_baseline))
print("Baseline total distance (m):", round(total_distance_baseline, 2))
route_baseline[:10]


Baseline stops visited: 99
Baseline total distance (m): 261744.42


[69355703,
 495944943,
 69199123,
 69194024,
 69189025,
 10609126363,
 4402667582,
 5416237800,
 3574964397,
 69565753]

In [33]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)


In [35]:
# Cell 6 — FINAL AI Routing Fix (ML + Safety Rules)
from tqdm import tqdm
import numpy as np

stops = df['node'].tolist()
unvisited = stops.copy()[1:]
current = stops[0]
route_ai = [current]
total_time_ai = 0

current_hour = 9

for _ in tqdm(range(len(stops) - 1)):
    best = None
    best_score = float("inf")

    for nxt in unvisited:
        d = dist_lookup.get((current, nxt), np.inf)
        if np.isinf(d):
            continue

        # ML features
        road_class = 0 if d < 5000 else 1 if d < 15000 else 2
        traffic = 0.6 if 7 <= current_hour <= 9 else 0.9
        features = [[d, current_hour, road_class, traffic]]
        eta = rf.predict(features)[0]

        # SAFETY #1 — no weird negatives or infs
        if eta <= 0 or np.isinf(eta) or np.isnan(eta):
            eta = d / 10

        # SAFETY #2 — block dumb shortcuts deeper into disconnected areas
        score = eta + (d * 0.0005)

        if score < best_score:
            best = nxt
            best_score = score

    if best is None:
        break

    route_ai.append(best)
    unvisited.remove(best)
    total_time_ai += best_score
    current = best

print("AI stops visited:", len(route_ai))
print("AI total predicted time (sec):", round(total_time_ai, 2))
route_ai[:10]


100%|███████████████████████████████████████████| 98/98 [00:39<00:00,  2.48it/s]

AI stops visited: 99
AI total predicted time (sec): 43591.47





[7550292110,
 11989827481,
 69250368,
 69565753,
 3574964397,
 5416237800,
 4402667582,
 10609126363,
 69189025,
 69194024]

In [None]:
# Cell 7 — KPI improvement calculation

improvement = (baseline_eta - ai_eta) / baseline_eta * 100
print(f"ETA Improvement: {improvement:.2f}%")
