In [5]:
import requests
import pandas as pd
from itertools import combinations
from tqdm import tqdm  # Add this
# import time  # Optional

API_KEY = "AIzaSyD5ELJ03IEUL98JtLBnSN_IKMOHfxOB9Jw"

# Function to get directions
def get_route_data(start_lat, start_lng, end_lat, end_lng):
    url = f"https://maps.googleapis.com/maps/api/directions/json?origin={start_lat},{start_lng}&destination={end_lat},{end_lng}&alternatives=true&departure_time=now&key={API_KEY}"
    response = requests.get(url)
    data = response.json()

    routes = []
    if "routes" in data:
        for route in data["routes"]:
            try:
                distance = route["legs"][0]["distance"]["value"]
                duration = route["legs"][0]["duration"]["value"]
                traffic_duration = route["legs"][0].get("duration_in_traffic", {}).get("value", duration)
                steps = len(route["legs"][0]["steps"])
                routes.append([
                    start_lat, start_lng, end_lat, end_lng,
                    distance, duration, traffic_duration, steps
                ])
            except Exception as e:
                print(f"⚠️ Error parsing route: {e}")
    return routes

# 🌐 South Indian cities with full AP + TS
south_india_cities = {
    # Karnataka
    "Bangalore": (12.9716, 77.5946),
    "Mysore": (12.2958, 76.6394),
    "Mangalore": (12.9141, 74.8560),

    # Tamil Nadu
    "Chennai": (13.0827, 80.2707),
    "Coimbatore": (11.0168, 76.9558),
    "Madurai": (9.9252, 78.1198),
    "Tirunelveli": (8.7139, 77.7567),
    "Salem": (11.6643, 78.1460),
    "Tiruchirappalli": (10.7905, 78.7047),

    # Kerala
    "Thiruvananthapuram": (8.5241, 76.9366),
    "Kochi": (9.9312, 76.2673),
    "Kozhikode": (11.2588, 75.7804),

    # Puducherry
    "Puducherry": (11.9139, 79.8145),

    # Telangana (TS)
    "Hyderabad": (17.3850, 78.4867),
    "Warangal": (17.9784, 79.5941),
    "Nizamabad": (18.6725, 78.0941),
    "Khammam": (17.2473, 80.1514),
    "Karimnagar": (18.4386, 79.1288),
    "Mahbubnagar": (16.7445, 77.9844),
    "Adilabad": (19.6667, 78.5333),
    "Ramagundam": (18.7557, 79.4748),

    # Andhra Pradesh (AP)
    "Vijayawada": (16.5062, 80.6480),
    "Visakhapatnam": (17.6868, 83.2185),
    "Guntur": (16.3067, 80.4365),
    "Tirupati": (13.6288, 79.4192),
    "Rajahmundry": (17.0005, 81.8040),
    "Kakinada": (16.9891, 82.2475),
    "Nellore": (14.4426, 79.9865),
    "Kadapa": (14.4775, 78.8231),
    "Anantapur": (14.6819, 77.6006),
    "Ongole": (15.5057, 80.0499),
    "Srikakulam": (18.2969, 83.8966),
    "Chittoor": (13.2172, 79.1003),
}

# Other major Indian cities (interconnectivity)
pan_india_metros = {
    "Mumbai": (19.0760, 72.8777),
    "Delhi": (28.7041, 77.1025),
    "Kolkata": (22.5726, 88.3639),
    "Ahmedabad": (23.0225, 72.5714),
    "Pune": (18.5204, 73.8567),
    "Nagpur": (21.1458, 79.0882),
    "Lucknow": (26.8467, 80.9462),
    "Jaipur": (26.9124, 75.7873),
    "Guwahati": (26.1445, 91.7362),
    "Bhubaneswar": (20.2961, 85.8245)
}

# Generate South-South combinations
south_pairs = list(combinations(south_india_cities.items(), 2))

# Generate South ↔ Pan-India interconnectivity
connectivity_pairs = [((s_name, s_coords), (m_name, m_coords))
                      for s_name, s_coords in south_india_cities.items()
                      for m_name, m_coords in pan_india_metros.items()]

# Combine all routes
all_city_pairs = south_pairs + connectivity_pairs

# Data Collection with tqdm
all_routes = []
for (src_name, (start_lat, start_lng)), (dst_name, (end_lat, end_lng)) in tqdm(all_city_pairs, desc="🚗 Fetching Routes"):
    try:
        routes = get_route_data(start_lat, start_lng, end_lat, end_lng)
        all_routes.extend(routes)
        # time.sleep(0.5)  # Optional: reduce risk of quota issues
    except Exception as e:
        print(f"❌ Error: {src_name} to {dst_name} — {e}")

# Save to CSV
df = pd.DataFrame(all_routes, columns=[
    "start_lat", "start_lng", "end_lat", "end_lng",
    "distance", "duration", "traffic_duration", "steps"
])
df.to_csv("expanded_routes_data.csv", index=False)
print("✅ Data collection complete! Saved as expanded_routes_data.csv")

🚗 Fetching Routes: 100%|██████████| 858/858 [07:46<00:00,  1.84it/s]

✅ Data collection complete! Saved as expanded_routes_data.csv





In [7]:
import pandas as pd
import numpy as np
import xgboost as xgb
import pickle
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Load dataset
df = pd.read_csv("expanded_routes_data.csv")

# Feature Engineering
df["distance_km"] = df["distance"] / 1000
df["duration_min"] = df["duration"] / 60
df["traffic_duration_min"] = df["traffic_duration"] / 60

# Select features and labels
X = df[["distance_km", "steps"]]  # Features
y = df["traffic_duration_min"]  # Target: Travel time with traffic

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define XGBoost model
model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5)

# Hyperparameter tuning
param_grid = {"n_estimators": [100, 200], "learning_rate": [0.1, 0.05], "max_depth": [5, 7]}
grid_search = GridSearchCV(model, param_grid, cv=3, scoring="neg_mean_absolute_error")
grid_search.fit(X_train, y_train)

# Train best model
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

# Evaluate model
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"Model Performance: MAE = {mae:.2f}, RMSE = {rmse:.2f}")

# Save the trained model
with open("model.pkl", "wb") as f:
    pickle.dump(best_model, f)

print("Model saved as model.pkl")

Model Performance: MAE = 60.57, RMSE = 95.51
Model saved as model.pkl
