In [None]:
# === Intra-City Bus Simulation (~40k entries) ===
import random, json
from datetime import datetime, timedelta, time
from math import radians, cos, sin, asin, sqrt

# 1️⃣ Helper: Haversine distance in km
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # km
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    return R * c

# 2️⃣ Define routes
ROUTES = [
    {
        "stops":[(30.7410,76.7821),(30.7360,76.7845),(30.7255,76.7732),(30.7114,76.7676)],
        "base_time_min":25
    },
    {
        "stops":[(30.7410,76.7821),(30.7062,76.7991),(30.7056,76.8008),(30.7050,76.7950)],
        "base_time_min":30
    },
    {
        "stops":[(30.6989,76.7789),(30.7255,76.7732),(30.7360,76.7845),(30.7410,76.7821)],
        "base_time_min":28
    }
]

BUSES_PER_ROUTE = 10        # more buses per route per day
HISTORICAL_DAYS = 365       # full year history
WEATHER_PROBS = ["Clear","Rainy","Foggy"]
WEATHER_WEIGHTS = [0.8,0.15,0.05]

def format_time(dt): return dt.strftime("%H:%M")

# 3️⃣ Generate schedule & historical dataset
schedule_records = []
history_records = []
bus_counter = 1

for route in ROUTES:
    interval = 30
    first_bus_time = 5*60  # 5 AM

    for b in range(BUSES_PER_ROUTE):
        bus_id = f"BUS{bus_counter:03d}"
        dep_min = first_bus_time + b*interval
        dep_hour, dep_minute = divmod(dep_min,60)
        departure_time = time(dep_hour%24, dep_minute)
        arrival_min = dep_min + route['base_time_min']
        arr_hour, arr_minute = divmod(arrival_min,60)
        arrival_time = time(arr_hour%24, arr_minute)

        # expected times
        total_secs = route['base_time_min']*60
        step_secs = total_secs / (len(route['stops'])-1)
        dep_dt = datetime.combine(datetime.today().date(), departure_time)
        exp_stop_times = [format_time(dep_dt + timedelta(seconds=step_secs*i)) for i in range(len(route['stops']))]
        route_stops = [{"coordinates": route['stops'][i], "exp_time": exp_stop_times[i]} for i in range(len(route['stops']))]

        schedule_records.append({
            "bus_id":bus_id,
            "route_stops":route_stops,
            "scheduled_departure_time":departure_time.strftime("%H:%M"),
            "scheduled_arrival_time":arrival_time.strftime("%H:%M"),
            "scheduled_time_minutes":route['base_time_min']
        })

        # generate historical entries
        for day_offset in range(HISTORICAL_DAYS):
            date = datetime.now() - timedelta(days=day_offset)
            dep_var = random.randint(-2,3)
            arr_var = random.randint(-3,5)
            dep_dt_actual = datetime.combine(date.date(), departure_time) + timedelta(minutes=dep_var)
            arr_dt_actual = datetime.combine(date.date(), arrival_time) + timedelta(minutes=arr_var)

            weather = random.choices(WEATHER_PROBS, weights=WEATHER_WEIGHTS, k=1)[0]
            hour = dep_dt_actual.hour
            demand = "High" if 7<=hour<=10 or 17<=hour<=20 else "Medium"

            total_secs_actual = (arr_dt_actual - dep_dt_actual).total_seconds()
            step_secs_actual = total_secs_actual / (len(route_stops)-1)
            actual_times = [format_time(dep_dt_actual + timedelta(seconds=step_secs_actual*i)) for i in range(len(route_stops))]

            mid_stops = route_stops[1:-1]
            stops_in_between = [{"coordinates":mid_stops[i]['coordinates'],
                                 "exp_time":mid_stops[i]['exp_time'],
                                 "actual_time":actual_times[i+1]} for i in range(len(mid_stops))]

            # distance
            start_lat,start_lon = route_stops[0]['coordinates']
            end_lat,end_lon = route_stops[-1]['coordinates']
            dist_km = haversine(start_lat,start_lon,end_lat,end_lon)
            num_stops = len(route_stops)

            history_records.append({
                "bus_id":bus_id,
                "date":str(date.date()),
                "start_station":{"coordinates":route_stops[0]['coordinates'],
                                 "exp_time":route_stops[0]['exp_time'],
                                 "actual_time":actual_times[0]},
                "end_station":{"coordinates":route_stops[-1]['coordinates'],
                               "exp_time":route_stops[-1]['exp_time'],
                               "actual_time":actual_times[-1]},
                "stops_in_between":stops_in_between,
                "weather":weather,
                "passenger_demand":demand,
                "actual_travel_time_min":total_secs_actual/60,
                "delay_minutes":(arr_dt_actual-datetime.combine(date.date(),arrival_time)).total_seconds()/60,
                "distance_km":dist_km,
                "num_stops":num_stops,
                "scheduled_time_minutes":route['base_time_min']
            })

        bus_counter +=1

# Save JSON files
with open("intra_city_schedule.json","w") as f: json.dump(schedule_records,f,indent=4)
with open("intra_city_history.json","w") as f: json.dump(history_records,f,indent=4)

print("✅ Generated intra_city_schedule.json & intra_city_history.json")


✅ Generated intra_city_schedule.json & intra_city_history.json


In [None]:
# === Main XGBoost ETA Model & Interactive ETA Calculator (GPU Enhanced) ===
import pandas as pd
import numpy as np
import json
import joblib
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import xgboost as xgb
import pytz
import requests
from math import radians, cos, sin, asin, sqrt

print("Libraries imported successfully.")

# --- Helper: Haversine distance
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # km
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat/2)**2 + cos(radians(lat1))*cos(radians(lat2))*sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    return R * c

# 1️⃣ Load historical dataset
with open("intra_city_history.json", "r") as f:
    history_records = json.load(f)

df = pd.DataFrame(history_records)

# 2️⃣ Extract base features
df['month'] = pd.to_datetime(df['date']).dt.month
df['day'] = pd.to_datetime(df['date']).dt.day
df['day_of_week'] = pd.to_datetime(df['date']).dt.weekday

df['start_lat'] = df['start_station'].apply(lambda x: x['coordinates'][0])
df['start_lon'] = df['start_station'].apply(lambda x: x['coordinates'][1])
df['end_lat'] = df['end_station'].apply(lambda x: x['coordinates'][0])
df['end_lon'] = df['end_station'].apply(lambda x: x['coordinates'][1])

df['hour'] = df['start_station'].apply(lambda x: int(x['actual_time'].split(":")[0]))

# Add distance and number of stops
df['distance_km'] = df.apply(lambda row: haversine(
    row['start_lat'], row['start_lon'], row['end_lat'], row['end_lon']), axis=1)
df['num_stops'] = df['stops_in_between'].apply(lambda x: len(x) + 2)
df['scheduled_time_minutes'] = df['scheduled_time_minutes']

# 3️⃣ Add engineered features
df['speed_kmph'] = df['distance_km'] / (df['scheduled_time_minutes']/60)
df['peak_hour'] = df['hour'].apply(lambda h: 1 if 8<=h<=10 or 17<=h<=20 else 0)

df['time_per_stop'] = df['scheduled_time_minutes'] / df['num_stops']
df['congestion_index'] = df['delay_minutes'] / df['scheduled_time_minutes']
df['weekend'] = df['day_of_week'].apply(lambda d: 1 if d>=5 else 0)

X = df[['distance_km','num_stops','scheduled_time_minutes','hour','day_of_week',
        'peak_hour','speed_kmph','time_per_stop','congestion_index','weekend',
        'weather','passenger_demand']]
y = df['actual_travel_time_min']

# One-hot encode categoricals
X = pd.get_dummies(X, columns=['weather','passenger_demand'])

# 4️⃣ Train-test split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

# 5️⃣ Train XGBoost on GPU with regularization
xgb_model = xgb.XGBRegressor(
    n_estimators=800,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.85,
    colsample_bytree=0.85,
    reg_lambda=3.0,
    reg_alpha=1.0,
    min_child_weight=2,
    device='cuda',
    random_state=42
)

print("\nStarting XGBoost model training on GPU...")
xgb_model.fit(X_train, y_train)
print("Model training completed!")

# 6️⃣ Evaluate
y_pred = xgb_model.predict(X_test)
mae = mean_absolute_error(y_test,y_pred)
r2 = r2_score(y_test,y_pred)
print(f"\nModel Evaluation -> MAE: {mae:.2f} min, R²: {r2:.2f}")

# 7️⃣ Save model
model_filename = 'eta_model_xgboost_gpu.joblib'
joblib.dump(xgb_model, model_filename)
print(f"Model saved to '{model_filename}'")

# ------------------------------
# 8️⃣ Interactive ETA Calculator
# ------------------------------
india_tz = pytz.timezone('Asia/Kolkata')

def get_osrm_eta(origin_coords,destination_coords):
    origin = f"{origin_coords['lon']},{origin_coords['lat']}"
    destination = f"{destination_coords['lon']},{destination_coords['lat']}"
    url = f"http://router.project-osrm.org/route/v1/driving/{origin};{destination}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()['routes'][0]['duration']
    except:
        print("❌ OSRM request failed.")
        return None

def get_live_weather(coords):
    weather_api_key = "621d14583cca031ece74b61c6075975b"
    url = f"https://api.openweathermap.org/data/2.5/weather?lat={coords['lat']}&lon={coords['lon']}&appid={weather_api_key}"
    try:
        response = requests.get(url)
        response.raise_for_status()
        weather_condition = response.json()['weather'][0]['main']
        if "Rain" in weather_condition: return "Rainy"
        if "Fog" in weather_condition or "Mist" in weather_condition: return "Foggy"
        return "Clear"
    except:
        return "Clear"

def get_final_eta_status(bus_coords,stop_coords,scheduled_time_min,num_stops=2):
    now = datetime.now(india_tz)
    osrm_sec = get_osrm_eta(bus_coords,stop_coords)
    if osrm_sec is None: return
    print(f"1️⃣ OSRM ETA (no traffic): {osrm_sec/60:.1f} min")

    weather = get_live_weather(stop_coords)
    print(f"2️⃣ Live Weather: {weather}")

    distance = haversine(bus_coords['lat'], bus_coords['lon'], stop_coords['lat'], stop_coords['lon'])

    sample = pd.DataFrame([{
        'distance_km':distance,
        'num_stops':num_stops,
        'scheduled_time_minutes':scheduled_time_min,
        'hour':now.hour,
        'day_of_week':now.weekday(),
        'peak_hour': 1 if 8<=now.hour<=10 or 17<=now.hour<=20 else 0,
        'weather_'+weather:1,
        'passenger_demand_Medium':1
    }]).reindex(columns=X.columns, fill_value=0)

    ai_sec = xgb_model.predict(sample)[0]*60
    print(f"3️⃣ AI Model ETA (historical): {ai_sec/60:.1f} min")

    final_sec = osrm_sec*0.4 + ai_sec*0.6
    print(f"4️⃣ Final Combined ETA: {final_sec/60:.1f} min")

    delay_sec = final_sec-(scheduled_time_min*60)
    arrival = now+timedelta(seconds=float(final_sec))
    if delay_sec<60:
        print(f"✅ STATUS: ON TIME, ETA: {arrival.strftime('%Y-%m-%d %I:%M %p')}")
    else:
        print(f"⚠️ STATUS: DELAYED, Delay: {round(delay_sec/60)} min, ETA: {arrival.strftime('%Y-%m-%d %I:%M %p')}")


# Interactive input
print("\n--- Interactive ETA Calculator ---")
try:
    start_lat = float(input("Enter Start Latitude: "))
    start_lon = float(input("Enter Start Longitude: "))
    end_lat = float(input("Enter End Latitude: "))
    end_lon = float(input("Enter End Longitude: "))
    scheduled_time_min = float(input("Enter Scheduled Time (minutes): "))
    num_stops = int(input("Enter number of stops (including start/end): "))

    start_coords = {'lat':start_lat,'lon':start_lon}
    end_coords = {'lat':end_lat,'lon':end_lon}

    get_final_eta_status(start_coords,end_coords,scheduled_time_min,num_stops=num_stops)
except ValueError:
    print("\nInvalid input. Please enter numbers for coordinates, time, and stops.")
except Exception as e:
    print(f"An error occurred: {e}")

Libraries imported successfully.

Starting XGBoost model training on GPU...
Model training completed!

Model Evaluation -> MAE: 1.49 min, R²: 0.78
Model saved to 'eta_model_xgboost_gpu.joblib'


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


  return func(**kwargs)


In [None]:
# Save the model in .pkl format
import joblib

model_filename_pkl = 'eta_model_xgboost_gpu.pkl'
joblib.dump(xgb_model, model_filename_pkl)

print(f"Model saved to '{model_filename_pkl}' in .pkl format.")

Model saved to 'eta_model_xgboost_gpu.pkl' in .pkl format.
