In [6]:
import pandas as pd

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Tools for Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Tools for Neural Network and Tuning
import tensorflow as tf # <-- CRITICAL: ADD THIS LINE for callbacks!
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# --- 1. Data Loading and Initial Cleaning ---
# Load dataset, parsing datetime columns
df = pd.read_csv("dataset.csv", parse_dates=[1, 2])
# Renaming column names by index is dangerous; rely on columns 1 and 2 being datetimes.
# Keeping 'store_id' for feature engineering before dropping later.

# --- 2. Feature Engineering ---
# Create the raw time difference (timedelta object)
df['time_taken'] = df['actual_delivery_time'] - df['created_at']

# Convert the timedelta object to the numerical target variable (minutes)
df['time_taken_mins'] = df['time_taken'].dt.total_seconds() / 60

# Extract time-based features from the creation time
df['hours'] = df['created_at'].dt.hour
df['day'] = df['created_at'].dt.dayofweek # Monday=0, Sunday=6


# --- NEW: Historical Average Delivery Time per Store ---
# Calculate the mean delivery time for each store and merge it back
store_avg_time = df.groupby('store_id')['time_taken_mins'].mean().reset_index()
store_avg_time.rename(columns={'time_taken_mins': 'store_avg_delivery_time'}, inplace=True)
df = pd.merge(df, store_avg_time, on='store_id', how='left')

# Fill NaNs (if any new store IDs) with the global mean
global_mean = df['store_avg_delivery_time'].mean()
df['store_avg_delivery_time'].fillna(global_mean, inplace=True)


# --- NEW: Temporal Features (Rush Hour Flags) ---
# 1. Lunch Rush (11:00 to 14:00 inclusive)
df['is_lunch_rush'] = df['hours'].apply(lambda x: 1 if 11 <= x <= 14 else 0)
# 2. Dinner Rush (17:00 to 21:00 inclusive)
df['is_dinner_rush'] = df['hours'].apply(lambda x: 1 if 17 <= x <= 21 else 0)
# 3. Weekend Flag (5=Sat, 6=Sun)
df['is_weekend'] = df['day'].apply(lambda x: 1 if x >= 5 else 0)


# --- 3. Final Cleaning and Encoding ---
# Drop the original datetime columns, the raw timedelta, and the store_id (now that we have its avg time)
df.drop(['time_taken', 'created_at', 'actual_delivery_time', 'store_id'], axis=1, inplace=True)

# Remove rows with any missing data
df.dropna(inplace=True)

# Use One-Hot Encoding for the categorical feature (preferred for non-ordinal features)
df = pd.get_dummies(df, columns=['store_primary_category'], prefix='category', dummy_na=False)

# --- 4. Prepare Data for Modeling ---
y = df['time_taken_mins']
x = df.drop(['time_taken_mins'], axis=1)

# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['store_avg_delivery_time'].fillna(global_mean, inplace=True)


In [7]:
# You need to run this command in your environment once
!pip install keras-tuner



In [8]:
"""# --- 5. Scaling Data for NN ---
# Initialize and fit the scaler ONLY on training data
scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)


# --- 6. Define the Hypermodel Function for Keras Tuner ---
def build_model(hp):
    # Get the number of input features
    input_dim = x_train_scaled.shape[1]

    model = Sequential()

    # Input Layer (Fixed)
    model.add(Dense(input_dim, kernel_initializer='normal', activation='relu', input_shape=(input_dim,)))

    # Hyperparameter for the number of hidden layers (1 to 3)
    for i in range(hp.Int('num_layers', 1, 3)):
        # Hyperparameter for the size of each dense layer
        model.add(Dense(
            # Units range from 32 to 256, stepping by 32
            units=hp.Int(f'units_{i}', min_value=32, max_value=256, step=32),
            activation='relu'
        ))

    # Output Layer (Fixed - use 'linear' for regression)
    model.add(Dense(1, activation='linear'))

    # Hyperparameter for the learning rate
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    # Compile the model
    adam = Adam(learning_rate=hp_learning_rate)
    model.compile(optimizer=adam, loss='mse', metrics=['mae'])

    return model


# --- 7. Run the Tuning Process ---
print("Starting Hyperparameter Tuning...")

tuner = kt.RandomSearch(
    build_model,
    objective='val_mae',    # Minimize Mean Absolute Error on validation data
    max_trials=10,          # Try 10 different combinations
    executions_per_trial=1, # One run per combination for speed
    directory='keras_tuning',
    project_name='delivery_time_nn_v2'
)

# Stop training early if validation MAE stops improving for 5 epochs
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_mae', patience=5)

# Start the search
# We use a validation split of 20% to guide the tuner
tuner.search(
    x_train_scaled,
    y_train,
    epochs=50,
    validation_split=0.2,
    batch_size=128,
    callbacks=[stop_early],
    verbose=0 # Set verbose to 1 to see more logs
)

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print("\n--- Hyperparameter Tuning Complete ---")
print(f"Best Learning Rate: {best_hps.get('learning_rate')}")
print(f"Best Number of Layers: {best_hps.get('num_layers')}")
# You can print more best_hps here
print("Found best model.")"""

'# --- 5. Scaling Data for NN ---\n# Initialize and fit the scaler ONLY on training data\nscaler = MinMaxScaler()\nx_train_scaled = scaler.fit_transform(x_train)\nx_test_scaled = scaler.transform(x_test)\n\n\n# --- 6. Define the Hypermodel Function for Keras Tuner ---\ndef build_model(hp):\n    # Get the number of input features\n    input_dim = x_train_scaled.shape[1] \n    \n    model = Sequential()\n    \n    # Input Layer (Fixed)\n    model.add(Dense(input_dim, kernel_initializer=\'normal\', activation=\'relu\', input_shape=(input_dim,)))\n\n    # Hyperparameter for the number of hidden layers (1 to 3)\n    for i in range(hp.Int(\'num_layers\', 1, 3)):\n        # Hyperparameter for the size of each dense layer\n        model.add(Dense(\n            # Units range from 32 to 256, stepping by 32\n            units=hp.Int(f\'units_{i}\', min_value=32, max_value=256, step=32),\n            activation=\'relu\'\n        ))\n        \n    # Output Layer (Fixed - use \'linear\' for regressi

In [9]:
# --- 8. Final Training of the Best Model ---
# Retrain the best model with more epochs for the final run
print("\nRetraining final best model for 100 epochs...")
final_model = tuner.hypermodel.build(best_hps)

# Use a longer EarlyStopping patience for final training
stop_final = tf.keras.callbacks.EarlyStopping(monitor='val_mae', patience=10, restore_best_weights=True)

history_final = final_model.fit(
    x_train_scaled,
    y_train,
    epochs=100,
    validation_split=0.2,
    batch_size=128,
    callbacks=[stop_final],
    verbose=1
)

# --- 9. Final Evaluation ---
# 1. Get Prediction
nn_prediction = final_model.predict(x_test_scaled)

# CRITICAL FIX: FLATTEN THE PREDICTIONS
nn_prediction = nn_prediction.flatten()

# 2. Define MAPE function
def MAPE(y_actual, y_predicted):
    epsilon = 1e-10
    y_actual = np.asarray(y_actual)
    mape = np.mean(np.abs((y_actual - y_predicted) / (y_actual + epsilon))) * 100
    return mape

# 3. Calculate all metrics
nn_mse = mean_squared_error(y_test, nn_prediction)
nn_rmse = nn_mse ** 0.5
nn_mae = mean_absolute_error(y_test, nn_prediction)
nn_r2 = r2_score(y_test, nn_prediction)
nn_mape = MAPE(y_test, nn_prediction)

# 4. Print the final results
print("\n--- FINAL NEURAL NETWORK TEST PERFORMANCE (Tuned & Enhanced Features) ---")
print(f'Mean Squared Error (MSE): {nn_mse:.2f}')
print(f'Root Mean Squared Error (RMSE): {nn_rmse:.2f} minutes')
print(f'Mean Absolute Error (MAE): {nn_mae:.2f} minutes')
print(f'R-squared (R2 Score): {nn_r2:.4f}')
print(f'Mean Absolute Percentage Error (MAPE): {nn_mape:.2f}%')
print("-" * 50)


Retraining final best model for 100 epochs...


NameError: name 'tuner' is not defined

after gettinhg new parameter
new

In [10]:
import pandas as pd
import numpy as np
import tensorflow as tf
import keras_tuner as kt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# --- 1. Data Loading and Feature Engineering ---

# Load dataset, parsing datetime columns
df = pd.read_csv("dataset.csv", parse_dates=[1, 2])

# Create the target variable (delivery time in minutes)
df['time_taken'] = df['actual_delivery_time'] - df['created_at']
df['time_taken_mins'] = df['time_taken'].dt.total_seconds() / 60

# Extract time-based features
df['hours'] = df['created_at'].dt.hour
df['day'] = df['created_at'].dt.dayofweek # Monday=0, Sunday=6


# --- 2. Advanced Feature Engineering ---

# Historical Average Delivery Time per Store
store_avg_time = df.groupby('store_id')['time_taken_mins'].mean().reset_index()
store_avg_time.rename(columns={'time_taken_mins': 'store_avg_delivery_time'}, inplace=True)
df = pd.merge(df, store_avg_time, on='store_id', how='left')
global_mean = df['store_avg_delivery_time'].mean()
df['store_avg_delivery_time'].fillna(global_mean, inplace=True)

# Temporal Features (Rush Hour Flags)
df['is_lunch_rush'] = df['hours'].apply(lambda x: 1 if 11 <= x <= 14 else 0)
df['is_dinner_rush'] = df['hours'].apply(lambda x: 1 if 17 <= x <= 21 else 0)
df['is_weekend'] = df['day'].apply(lambda x: 1 if x >= 5 else 0)


# --- 3. Final Cleaning and Encoding ---

# Drop original columns and handle NaNs
df.drop(['time_taken', 'created_at', 'actual_delivery_time', 'store_id'], axis=1, inplace=True)
df.dropna(inplace=True)

# One-Hot Encoding for the categorical feature
df = pd.get_dummies(df, columns=['store_primary_category'], prefix='category', dummy_na=False)


# --- 4. Prepare Data for Modeling ---

y = df['time_taken_mins']
x = df.drop(['time_taken_mins'], axis=1)

# Split data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Scaling Data for NN
scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)


# --- 5. Build Final Tuned Model ---

input_dim = x_train_scaled.shape[1]
FINAL_LEARNING_RATE = 0.01 # Best Hyperparameter
FINAL_NUM_LAYERS = 2       # Best Hyperparameter

final_model = Sequential()
final_model.add(Dense(input_dim, kernel_initializer='normal', activation='relu', input_shape=(input_dim,)))

# Build 2 hidden layers (as per Best Hyperparameters)
# We will use the average layer size from the search (128 units is a good choice)
final_model.add(Dense(128, activation='relu'))
final_model.add(Dense(64, activation='relu')) # Second layer

# Output Layer
final_model.add(Dense(1, activation='linear'))

# Compile the model with the best learning rate
adam = Adam(learning_rate=FINAL_LEARNING_RATE)
final_model.compile(optimizer=adam, loss='mse', metrics=['mae'])


# --- 6. Final Training and Evaluation ---

print("\nRetraining final tuned model...")

# Use EarlyStopping to prevent overfitting during final training
stop_final = tf.keras.callbacks.EarlyStopping(monitor='val_mae', patience=10, restore_best_weights=True)

history_final = final_model.fit(
    x_train_scaled,
    y_train,
    epochs=100,
    validation_split=0.2,
    batch_size=128,
    callbacks=[stop_final],
    verbose=1
)

# 1. Get Prediction
nn_prediction = final_model.predict(x_test_scaled)
nn_prediction = nn_prediction.flatten() # CRITICAL FIX: Flatten the output

# 2. Define MAPE function
def MAPE(y_actual, y_predicted):
    epsilon = 1e-10
    y_actual = np.asarray(y_actual)
    mape = np.mean(np.abs((y_actual - y_predicted) / (y_actual + epsilon))) * 100
    return mape

# 3. Calculate all metrics
nn_mse = mean_squared_error(y_test, nn_prediction)
nn_rmse = nn_mse ** 0.5
nn_mae = mean_absolute_error(y_test, nn_prediction)
nn_r2 = r2_score(y_test, nn_prediction)
nn_mape = MAPE(y_test, nn_prediction)

# 4. Print the final results
print("\n--- FINAL NEURAL NETWORK TEST PERFORMANCE (Tuned & Enhanced Features) ---")
print(f'Mean Squared Error (MSE): {nn_mse:.2f}')
print(f'Root Mean Squared Error (RMSE): {nn_rmse:.2f} minutes')
print(f'Mean Absolute Error (MAE): {nn_mae:.2f} minutes')
print(f'R-squared (R2 Score): {nn_r2:.4f}')
print(f'Mean Absolute Percentage Error (MAPE): {nn_mape:.2f}%')
print("-" * 50)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['store_avg_delivery_time'].fillna(global_mean, inplace=True)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Retraining final tuned model...
Epoch 1/100
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 775.4174 - mae: 14.8683 - val_loss: 295.0294 - val_mae: 12.0875
Epoch 2/100
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 639.9382 - mae: 12.3653 - val_loss: 296.6396 - val_mae: 12.2574
Epoch 3/100
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 753.4326 - mae: 12.2508 - val_loss: 291.3465 - val_mae: 11.9785
Epoch 4/100
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 2427.7144 - mae: 13.1940 - val_loss: 294.0204 - val_mae: 12.0251
Epoch 5/100
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 673.4949 - mae: 12.1845 - val_loss: 291.8960 - val_mae: 11.8717
Epoch 6/100
[1m882/882[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 723.9861 - mae: 12.1498 - val_loss: 294.7389 - val_mae: 12.3122
Epoch 7/100


In [11]:
final_model.save('delivery_time_model.h5')
#final_model.save('delivery_time_model', save_format='tf')



In [12]:
import joblib
joblib.dump(scaler, 'fitted_scaler.joblib')

['fitted_scaler.joblib']

In [13]:
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf

# Placeholder for the artifacts and feature list (Replace with your actual loaded artifacts and list)
# final_model = tf.keras.models.load_model('delivery_time_model.h5')
# fitted_scaler = joblib.load('fitted_scaler.joblib')
# FINAL_COLUMNS = ['market_id', 'subtotal', 'total_items', 'num_distinct_items', ...]

def predict_delivery_time(raw_data: dict, model, scaler, feature_columns, store_categories) -> float:
    """
    Takes raw order data, processes it, and returns the predicted delivery time.

    Args:
        raw_data (dict): Dictionary of raw features for one new order.
        model: The loaded Keras model.
        scaler: The loaded fitted MinMaxScaler.
        feature_columns (list): The list of columns used during model training.
        store_categories (list): List of unique store categories for OHE.

    Returns:
        float: The predicted delivery time in minutes.
    """
    # 1. Convert raw data to DataFrame for processing
    input_df = pd.DataFrame([raw_data])

    # 2. Convert timestamp strings to datetime objects for feature engineering
    # Assuming the API receives 'created_at' as an ISO format string
    input_df['created_at'] = pd.to_datetime(input_df['created_at'])

    # 3. Recreate Time-Based Features (hours, day, rush hours, weekend)
    input_df['hours'] = input_df['created_at'].dt.hour
    input_df['day'] = input_df['created_at'].dt.dayofweek
    input_df['is_lunch_rush'] = input_df['hours'].apply(lambda x: 1 if 11 <= x <= 14 else 0)
    input_df['is_dinner_rush'] = input_df['hours'].apply(lambda x: 1 if 17 <= x <= 21 else 0)
    input_df['is_weekend'] = input_df['day'].apply(lambda x: 1 if x >= 5 else 0)

    # 4. Handle Historical Average Feature
    # NOTE: In a real system, you would query a database for this value,
    # but here we use a placeholder or the global mean used during training.
    # We assume 'store_avg_delivery_time' is part of the raw_data or is retrieved.
    if 'store_avg_delivery_time' not in input_df.columns:
         # Fallback: use the training set's global mean if the store average isn't available
         global_mean_from_training = 45.0 # REPLACE with your actual global mean
         input_df['store_avg_delivery_time'] = global_mean_from_training

    # 5. One-Hot Encoding (OHE)
    # CRITICAL: We use 'reindex' to ensure all OHE columns are present and in the right order
    input_df = pd.get_dummies(input_df, columns=['store_primary_category'])

    # Create a DataFrame template for the final feature set, ensuring column order
    # Note: 'market_id' and other base features must be dropped/selected correctly
    final_features = input_df.reindex(columns=feature_columns, fill_value=0)

    # 6. Scaling
    # Scale the feature vector using the fitted training scaler
    x_scaled = scaler.transform(final_features)

    # 7. Prediction
    prediction = model.predict(x_scaled)[0][0]

    # Return the prediction in minutes
    return float(prediction)

In [14]:
import joblib
import tensorflow as tf
from fastapi import FastAPI
from pydantic import BaseModel
import datetime

# --- 1. Load Artifacts ---
# Replace the placeholders below with the actual file paths
try:
    final_model = tf.keras.models.load_model('delivery_time_model.h5')
    fitted_scaler = joblib.load('fitted_scaler.joblib')

    # CRITICAL: This list must be derived from x.columns after all processing.
    # It ensures the input DataFrame is in the exact order the model expects.
    # Placeholder example: You MUST use your actual list.
    FINAL_COLUMNS = [
        'market_id', 'subtotal', 'total_items', 'num_distinct_items',
        'min_item_price', 'max_item_price', 'total_onshift_partners',
        'total_busy_partners', 'total_outstanding_orders', 'store_avg_delivery_time',
        'hours', 'day', 'is_lunch_rush', 'is_dinner_rush', 'is_weekend',
        # And all your OHE categories, e.g.:
        'category_american', 'category_mexican', 'category_thai',
        # ... all other category columns ...
    ]
    # List of unique categories used to generate the OHE columns
    STORE_CATEGORIES = ['american', 'mexican', 'thai', ...]
    GLOBAL_MEAN_TIME = 45.0 # Replace with your calculated global mean

except Exception as e:
    print(f"Error loading model artifacts: {e}")
    # In a production environment, you would exit here if artifacts fail to load.

# --- 2. Initialize FastAPI ---
app = FastAPI(
    title="Delivery Time Prediction API",
    description="Provides real-time estimates for order delivery based on a tuned Neural Network."
)

Error loading model artifacts: Could not locate function 'mse'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': 'keras.metrics', 'class_name': 'function', 'config': 'mse', 'registered_name': 'mse'}


In [15]:
# Create a Pydantic model for robust input validation
class OrderInput(BaseModel):
    # Base features
    market_id: float
    created_at: str # Receive as a string (ISO format)
    subtotal: int
    total_items: int
    num_distinct_items: int
    min_item_price: int
    max_item_price: int
    total_onshift_partners: float
    total_busy_partners: float
    total_outstanding_orders: float
    store_primary_category: str

    # Custom feature needed for prediction
    store_avg_delivery_time: float # This should be looked up from a DB by the calling service

# Define the API endpoint
@app.post("/predict_time")
def predict(order_data: OrderInput):
    # Convert the Pydantic model back to a simple dict for your pipeline function
    raw_data = order_data.dict()

    try:
        # Call the core pipeline function
        prediction = predict_delivery_time(
            raw_data=raw_data,
            model=final_model,
            scaler=fitted_scaler,
            feature_columns=FINAL_COLUMNS,
            store_categories=STORE_CATEGORIES
        )

        return {
            "status": "success",
            "estimated_delivery_minutes": round(prediction, 2)
        }

    except Exception as e:
        # Log the error (e.g., to a monitoring service)
        return {"status": "error", "message": f"Prediction failed due to: {str(e)}"}

# The raw predict_delivery_time function you defined needs to be in this same file or imported.
# It should be runnable with the actual artifacts and feature lists defined above.

In [16]:
uvicorn api:app --reload

SyntaxError: invalid syntax (ipython-input-3817926223.py, line 1)