In [3]:
# !pip install tensorflow 

In [4]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
import json
import joblib
from pathlib import Path

# Deep learning libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

2025-11-20 16:27:39.371713: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-11-20 16:27:39.371961: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-20 16:27:39.409944: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-20 16:27:40.461072: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off,

TensorFlow version: 2.20.0
GPU Available: []


E0000 00:00:1763630861.577992    5853 cuda_executor.cc:1309] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1763630861.584689    5853 gpu_device.cc:2342] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [5]:
# Define paths
BASE_PATH = '/home/sirin/BIGDATA'
PICKUP_FILE = os.path.join(BASE_PATH, 'pickup_five_cities.csv')
DELIVERY_FILE = os.path.join(BASE_PATH, 'delivery_five_cities.csv')
OUTPUT_PATH = os.path.join(BASE_PATH, 'quick-result')

# Create output directory if not exists
os.makedirs(OUTPUT_PATH, exist_ok=True)

print(f"Pickup file: {PICKUP_FILE}")
print(f"Delivery file: {DELIVERY_FILE}")
print(f"Output path: {OUTPUT_PATH}")
print(f"Output directory exists: {os.path.exists(OUTPUT_PATH)}")

Pickup file: /home/sirin/BIGDATA/pickup_five_cities.csv
Delivery file: /home/sirin/BIGDATA/delivery_five_cities.csv
Output path: /home/sirin/BIGDATA/quick-result
Output directory exists: True


In [6]:
# Load data with chunking to handle large files
print("Loading pickup data...")
pickup_df = pd.read_csv(PICKUP_FILE, dtype={'from_city_name': 'category', 'typecode': 'category'})
print(f"Pickup data shape: {pickup_df.shape}")
print(f"Pickup columns: {pickup_df.columns.tolist()}")
print(pickup_df.head())

print("\nLoading delivery data...")
delivery_df = pd.read_csv(DELIVERY_FILE, dtype={'from_city_name': 'category', 'typecode': 'category'})
print(f"Delivery data shape: {delivery_df.shape}")
print(f"Delivery columns: {delivery_df.columns.tolist()}")
print(delivery_df.head())

Loading pickup data...
Pickup data shape: (531115, 19)
Pickup columns: ['order_id', 'from_dipan_id', 'from_city_name', 'delivery_user_id', 'accept_time', 'book_start_time', 'expect_got_time', 'poi_lng', 'poi_lat', 'aoi_id', 'typecode', 'got_time', 'got_gps_time', 'got_gps_lng', 'got_gps_lat', 'accept_gps_time', 'accept_gps_lng', 'accept_gps_lat', 'ds']
                           order_id                     from_dipan_id  \
0  8602ab9c07a2c499b52688a743b6e4bc  4221488252e767b1d0bc82a72053d43d   
1  d35cb23e9a4ebd887d85add85efb1db4  3bde1f883391f1bebb487666edea1776   
2  1bf13df5b9cfabe528ff11e252989bc2  8d07525f08cd28bb54e44805dd5d66c7   
3  a85d1f24fda72f6b863585d58092f9a6  255ab20fe675f4bf9e2907b45f940b81   
4  3e6cac7405dc999a17655505396f283c  0734207f2cdd486857508c2ab8a9e05f   

  from_city_name                  delivery_user_id     accept_time  \
0            杭州市  135bdc6219e9f1b5867d0d3cbfc159e5  03-19 07:25:00   
1            杭州市  4557a4c27997fe5817136fc1023f741e  03-19 07:55:00

In [7]:
# Data preprocessing for pickup
def preprocess_pickup_data(df):
    """Preprocess pickup data"""
    df_copy = df.copy()
    
    # Convert time columns
    for col in ['accept_time', 'book_start_time', 'expect_got_time', 'got_time']:
        if col in df_copy.columns:
            df_copy[col] = pd.to_datetime(df_copy[col], format='%m-%d %H:%M:%S', errors='coerce')
    
    # Extract features from timestamps
    if 'accept_time' in df_copy.columns:
        df_copy['accept_hour'] = df_copy['accept_time'].dt.hour
        df_copy['accept_minute'] = df_copy['accept_time'].dt.minute
    
    # Handle geographic coordinates
    df_copy['poi_lng'] = pd.to_numeric(df_copy['poi_lng'], errors='coerce')
    df_copy['poi_lat'] = pd.to_numeric(df_copy['poi_lat'], errors='coerce')
    
    # Fill missing values
    numeric_cols = df_copy.select_dtypes(include=[np.number]).columns
    df_copy[numeric_cols] = df_copy[numeric_cols].fillna(df_copy[numeric_cols].median())
    
    return df_copy

def preprocess_delivery_data(df):
    """Preprocess delivery data"""
    df_copy = df.copy()
    
    # Convert time columns
    for col in ['receipt_time', 'sign_time']:
        if col in df_copy.columns:
            df_copy[col] = pd.to_datetime(df_copy[col], format='%m-%d %H:%M:%S', errors='coerce')
    
    # Extract features from timestamps
    if 'receipt_time' in df_copy.columns:
        df_copy['receipt_hour'] = df_copy['receipt_time'].dt.hour
        df_copy['receipt_minute'] = df_copy['receipt_time'].dt.minute
    
    # Handle geographic coordinates
    df_copy['poi_lng'] = pd.to_numeric(df_copy['poi_lng'], errors='coerce')
    df_copy['poi_lat'] = pd.to_numeric(df_copy['poi_lat'], errors='coerce')
    
    # Fill missing values
    numeric_cols = df_copy.select_dtypes(include=[np.number]).columns
    df_copy[numeric_cols] = df_copy[numeric_cols].fillna(df_copy[numeric_cols].median())
    
    return df_copy

print("Preprocessing data...")
pickup_processed = preprocess_pickup_data(pickup_df)
delivery_processed = preprocess_delivery_data(delivery_df)

print(f"Processed pickup data shape: {pickup_processed.shape}")
print(f"Processed delivery data shape: {delivery_processed.shape}")
print("\nProcessed pickup sample:")
print(pickup_processed.head())

Preprocessing data...
Processed pickup data shape: (531115, 21)
Processed delivery data shape: (472419, 17)

Processed pickup sample:
                           order_id                     from_dipan_id  \
0  8602ab9c07a2c499b52688a743b6e4bc  4221488252e767b1d0bc82a72053d43d   
1  d35cb23e9a4ebd887d85add85efb1db4  3bde1f883391f1bebb487666edea1776   
2  1bf13df5b9cfabe528ff11e252989bc2  8d07525f08cd28bb54e44805dd5d66c7   
3  a85d1f24fda72f6b863585d58092f9a6  255ab20fe675f4bf9e2907b45f940b81   
4  3e6cac7405dc999a17655505396f283c  0734207f2cdd486857508c2ab8a9e05f   

  from_city_name                  delivery_user_id         accept_time  \
0            杭州市  135bdc6219e9f1b5867d0d3cbfc159e5 1900-03-19 07:25:00   
1            杭州市  4557a4c27997fe5817136fc1023f741e 1900-03-19 07:55:00   
2            烟台市  42e6bc48a505ffb4ff9e799cb70fb372 1900-03-19 13:11:00   
3            杭州市  32bdfe340b8be76dbf9435c35e87754b 1900-03-19 10:14:00   
4            上海市  f133f7a7cedf0ac38a12c1582d12201d 1900-0

In [8]:
# Calculate delivery efficiency as target variable
def calculate_efficiency_metrics(pickup_df, delivery_df):
    """Calculate efficiency metrics from pickup and delivery data"""
    
    # Distance calculation using Haversine formula approximation
    pickup_df['distance'] = np.sqrt(
        (pickup_df['poi_lng'] - pickup_df['poi_lng'].mean())**2 + 
        (pickup_df['poi_lat'] - pickup_df['poi_lat'].mean())**2
    )
    
    delivery_df['distance'] = np.sqrt(
        (delivery_df['poi_lng'] - delivery_df['poi_lng'].mean())**2 + 
        (delivery_df['poi_lat'] - delivery_df['poi_lat'].mean())**2
    )
    
    # Normalize distances
    pickup_df['distance_normalized'] = (pickup_df['distance'] - pickup_df['distance'].min()) / (pickup_df['distance'].max() - pickup_df['distance'].min() + 1e-8)
    delivery_df['distance_normalized'] = (delivery_df['distance'] - delivery_df['distance'].min()) / (delivery_df['distance'].max() - delivery_df['distance'].min() + 1e-8)
    
    # Efficiency score (lower is better - normalized distance)
    pickup_df['efficiency_score'] = pickup_df['distance_normalized']
    delivery_df['efficiency_score'] = delivery_df['distance_normalized']
    
    return pickup_df, delivery_df

print("Calculating efficiency metrics...")
pickup_processed, delivery_processed = calculate_efficiency_metrics(pickup_processed, delivery_processed)

print(f"Efficiency score range (pickup): {pickup_processed['efficiency_score'].min():.4f} - {pickup_processed['efficiency_score'].max():.4f}")
print(f"Efficiency score range (delivery): {delivery_processed['efficiency_score'].min():.4f} - {delivery_processed['efficiency_score'].max():.4f}")

Calculating efficiency metrics...
Efficiency score range (pickup): 0.0000 - 1.0000
Efficiency score range (delivery): 0.0000 - 1.0000


In [9]:
# Prepare features for deep learning model
def prepare_features_for_model(df, type_='pickup'):
    """Prepare features for neural network"""
    features = []
    
    # Numeric features
    if type_ == 'pickup':
        time_features = ['accept_hour', 'accept_minute'] if 'accept_hour' in df.columns else []
    else:
        time_features = ['receipt_hour', 'receipt_minute'] if 'receipt_hour' in df.columns else []
    
    numeric_features = ['poi_lng', 'poi_lat', 'distance_normalized'] + time_features
    numeric_features = [f for f in numeric_features if f in df.columns]
    
    # Categorical features
    categorical_features = ['from_city_name'] if 'from_city_name' in df.columns else []
    
    features = numeric_features + categorical_features
    return features

pickup_features = prepare_features_for_model(pickup_processed, 'pickup')
delivery_features = prepare_features_for_model(delivery_processed, 'delivery')

print(f"Pickup features: {pickup_features}")
print(f"Delivery features: {delivery_features}")

Pickup features: ['poi_lng', 'poi_lat', 'distance_normalized', 'accept_hour', 'accept_minute', 'from_city_name']
Delivery features: ['poi_lng', 'poi_lat', 'distance_normalized', 'receipt_hour', 'receipt_minute', 'from_city_name']


In [10]:
# Encode categorical features and prepare training data
def prepare_training_data(df, features, test_size=0.2):
    """Prepare training and testing datasets"""
    df_copy = df.copy()
    
    # Encode categorical variables
    categorical_cols = [f for f in features if df_copy[f].dtype == 'category' or df_copy[f].dtype == 'object']
    encoders = {}
    
    for col in categorical_cols:
        le = LabelEncoder()
        df_copy[col] = le.fit_transform(df_copy[col].astype(str))
        encoders[col] = le
    
    # Prepare X and y
    X = df_copy[features].values.astype(np.float32)
    y = df_copy['efficiency_score'].values.astype(np.float32)
    
    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=test_size, random_state=42
    )
    
    return X_train, X_test, y_train, y_test, scaler, encoders

print("Preparing training data...")
X_train_pickup, X_test_pickup, y_train_pickup, y_test_pickup, scaler_pickup, encoders_pickup = prepare_training_data(
    pickup_processed, pickup_features
)

X_train_delivery, X_test_delivery, y_train_delivery, y_test_delivery, scaler_delivery, encoders_delivery = prepare_training_data(
    delivery_processed, delivery_features
)

print(f"Pickup training data: X_train {X_train_pickup.shape}, y_train {y_train_pickup.shape}")
print(f"Delivery training data: X_train {X_train_delivery.shape}, y_train {y_train_delivery.shape}")

Preparing training data...
Pickup training data: X_train (424892, 6), y_train (424892,)
Delivery training data: X_train (377935, 6), y_train (377935,)
Pickup training data: X_train (424892, 6), y_train (424892,)
Delivery training data: X_train (377935, 6), y_train (377935,)


In [11]:
# Build deep neural network model
def build_deep_learning_model(input_dim, model_name='route_optimizer'):
    """Build a deep neural network for route optimization"""
    
    inputs = layers.Input(shape=(input_dim,), name=f'{model_name}_input')
    
    # First dense block
    x = layers.Dense(128, activation='relu', name=f'{model_name}_dense_1')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    
    # Second dense block
    x = layers.Dense(64, activation='relu', name=f'{model_name}_dense_2')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    
    # Third dense block
    x = layers.Dense(32, activation='relu', name=f'{model_name}_dense_3')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    
    # Fourth dense block
    x = layers.Dense(16, activation='relu', name=f'{model_name}_dense_4')(x)
    x = layers.Dropout(0.2)(x)
    
    # Output layer
    outputs = layers.Dense(1, activation='sigmoid', name=f'{model_name}_output')(x)
    
    model = Model(inputs=inputs, outputs=outputs, name=model_name)
    return model

print("Building deep learning models...")
pickup_model = build_deep_learning_model(X_train_pickup.shape[1], 'pickup_optimizer')
delivery_model = build_deep_learning_model(X_train_delivery.shape[1], 'delivery_optimizer')

# Compile models
pickup_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae', 'mse']
)

delivery_model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae', 'mse']
)

print("\nPickup Model:")
pickup_model.summary()

print("\nDelivery Model:")
delivery_model.summary()

Building deep learning models...

Pickup Model:



Delivery Model:


In [12]:
# Train models
print("Training pickup model...")
history_pickup = pickup_model.fit(
    X_train_pickup, y_train_pickup,
    batch_size=32,
    epochs=20,
    validation_split=0.2,
    verbose=1,
    callbacks=[
        keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6)
    ]
)

print("\nTraining delivery model...")
history_delivery = delivery_model.fit(
    X_train_delivery, y_train_delivery,
    batch_size=32,
    epochs=20,
    validation_split=0.2,
    verbose=1,
    callbacks=[
        keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
        keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6)
    ]
)

Training pickup model...
Epoch 1/20
Epoch 1/20
[1m10623/10623[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2ms/step - loss: 0.0030 - mae: 0.0355 - mse: 0.0030 - val_loss: 4.1958e-04 - val_mae: 0.0174 - val_mse: 4.1958e-04 - learning_rate: 0.0010
Epoch 2/20
[1m10623/10623[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2ms/step - loss: 0.0030 - mae: 0.0355 - mse: 0.0030 - val_loss: 4.1958e-04 - val_mae: 0.0174 - val_mse: 4.1958e-04 - learning_rate: 0.0010
Epoch 2/20
[1m10623/10623[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - loss: 0.0012 - mae: 0.0255 - mse: 0.0012 - val_loss: 3.5802e-04 - val_mae: 0.0154 - val_mse: 3.5802e-04 - learning_rate: 0.0010
Epoch 3/20
[1m10623/10623[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 2ms/step - loss: 0.0012 - mae: 0.0255 - mse: 0.0012 - val_loss: 3.5802e-04 - val_mae: 0.0154 - val_mse: 3.5802e-04 - learning_rate: 0.0010
Epoch 3/20
[1m10623/10623[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0

In [13]:
# Evaluate models
print("Evaluating models...")

# Pickup evaluation
pickup_test_loss, pickup_test_mae, pickup_test_mse = pickup_model.evaluate(X_test_pickup, y_test_pickup, verbose=0)
print(f"\nPickup Model Test Results:")
print(f"  Loss (MSE): {pickup_test_loss:.6f}")
print(f"  MAE: {pickup_test_mae:.6f}")
print(f"  MSE: {pickup_test_mse:.6f}")

# Delivery evaluation
delivery_test_loss, delivery_test_mae, delivery_test_mse = delivery_model.evaluate(X_test_delivery, y_test_delivery, verbose=0)
print(f"\nDelivery Model Test Results:")
print(f"  Loss (MSE): {delivery_test_loss:.6f}")
print(f"  MAE: {delivery_test_mae:.6f}")
print(f"  MSE: {delivery_test_mse:.6f}")

# Make predictions
y_pred_pickup = pickup_model.predict(X_test_pickup)
y_pred_delivery = delivery_model.predict(X_test_delivery)

print(f"\nPickup predictions shape: {y_pred_pickup.shape}")
print(f"Delivery predictions shape: {y_pred_delivery.shape}")

Evaluating models...

Pickup Model Test Results:
  Loss (MSE): 0.000174
  MAE: 0.011246
  MSE: 0.000174

Pickup Model Test Results:
  Loss (MSE): 0.000174
  MAE: 0.011246
  MSE: 0.000174

Delivery Model Test Results:
  Loss (MSE): 0.000163
  MAE: 0.007679
  MSE: 0.000163
[1m   1/3320[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m3:19[0m 60ms/step
Delivery Model Test Results:
  Loss (MSE): 0.000163
  MAE: 0.007679
  MSE: 0.000163
[1m3320/3320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 457us/step
[1m3320/3320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 457us/step
[1m2953/2953[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 543us/step
[1m2953/2953[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 543us/step

Pickup predictions shape: (106223, 1)
Delivery predictions shape: (94484, 1)

Pickup predictions shape: (106223, 1)
Delivery predictions shape: (94484, 1)


In [14]:
# Generate route optimization recommendations
def generate_route_recommendations(df, predictions, original_data):
    """Generate route optimization recommendations based on predictions"""
    recommendations = []
    
    for idx, (pred, score) in enumerate(zip(predictions, original_data['efficiency_score'].values)):
        pred_score = pred[0]
        
        if pred_score < score * 0.8:  # Significant improvement possible
            action = 'Optimize'
            priority = 'High'
        elif pred_score < score:
            action = 'Review'
            priority = 'Medium'
        else:
            action = 'Maintain'
            priority = 'Low'
        
        recommendations.append({
            'action': action,
            'priority': priority,
            'current_efficiency': float(score),
            'predicted_efficiency': float(pred_score),
            'improvement_potential': float(score - pred_score)
        })
    
    return pd.DataFrame(recommendations)

print("Generating route optimization recommendations...")
pickup_recommendations = generate_route_recommendations(pickup_processed, y_pred_pickup, pickup_processed)
delivery_recommendations = generate_route_recommendations(delivery_processed, y_pred_delivery, delivery_processed)

print(f"\nPickup Recommendations Summary:")
print(pickup_recommendations['action'].value_counts())
print(f"\nDelivery Recommendations Summary:")
print(delivery_recommendations['action'].value_counts())

print(f"\nPickup Sample Recommendations:")
print(pickup_recommendations.head(10))

print(f"\nDelivery Sample Recommendations:")
print(delivery_recommendations.head(10))

Generating route optimization recommendations...

Pickup Recommendations Summary:
action
Optimize    52913
Maintain    45391
Review       7919
Name: count, dtype: int64

Delivery Recommendations Summary:
action
Maintain    49389
Optimize    32612
Review      12483
Name: count, dtype: int64

Pickup Sample Recommendations:
     action priority  current_efficiency  predicted_efficiency  \
0  Maintain      Low            0.022024              0.076469   
1  Optimize     High            0.015605              0.006637   
2  Optimize     High            0.277097              0.060153   
3  Maintain      Low            0.024134              0.333979   
4  Maintain      Low            0.064055              0.304847   
5  Maintain      Low            0.018414              0.342364   
6  Optimize     High            0.023506              0.005243   
7  Maintain      Low            0.318400              0.560758   
8  Optimize     High            0.477149              0.033445   
9  Maintain      

In [15]:
# Save models and artifacts
print("Saving models and artifacts...")
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

# Save TensorFlow models
pickup_model_path = os.path.join(OUTPUT_PATH, f'pickup_optimizer_model_{timestamp}.h5')
delivery_model_path = os.path.join(OUTPUT_PATH, f'delivery_optimizer_model_{timestamp}.h5')

pickup_model.save(pickup_model_path)
delivery_model.save(delivery_model_path)

print(f"Pickup model saved: {pickup_model_path}")
print(f"Delivery model saved: {delivery_model_path}")

# Save scalers and encoders
scaler_artifacts = {
    'pickup_scaler': scaler_pickup,
    'delivery_scaler': scaler_delivery,
    'pickup_encoders': encoders_pickup,
    'delivery_encoders': encoders_delivery
}

artifacts_path = os.path.join(OUTPUT_PATH, f'preprocessing_artifacts_{timestamp}.pkl')
joblib.dump(scaler_artifacts, artifacts_path)
print(f"Preprocessing artifacts saved: {artifacts_path}")



Saving models and artifacts...




Pickup model saved: /home/sirin/BIGDATA/quick-result/pickup_optimizer_model_20251120_163355.h5
Delivery model saved: /home/sirin/BIGDATA/quick-result/delivery_optimizer_model_20251120_163355.h5
Preprocessing artifacts saved: /home/sirin/BIGDATA/quick-result/preprocessing_artifacts_20251120_163355.pkl


In [17]:
# Export results to CSV
print("Exporting results to CSV...")

# Pickup results
pickup_results = pickup_processed[['order_id', 'from_city_name', 'poi_lng', 'poi_lat', 'efficiency_score']].copy()
pickup_results_with_pred = pickup_results.iloc[len(pickup_results) - len(y_pred_pickup):].copy()
pickup_results_with_pred['predicted_efficiency'] = y_pred_pickup.flatten()
pickup_results_with_pred = pickup_results_with_pred.reset_index(drop=True)
pickup_results_with_pred['action'] = pickup_recommendations['action'].values[:len(pickup_results_with_pred)]
pickup_results_with_pred['priority'] = pickup_recommendations['priority'].values[:len(pickup_results_with_pred)]
pickup_results_with_pred['improvement_potential'] = pickup_recommendations['improvement_potential'].values[:len(pickup_results_with_pred)]

pickup_export = os.path.join(OUTPUT_PATH, f'pickup_optimization_results_{timestamp}.csv')
pickup_results_with_pred.to_csv(pickup_export, index=False)
print(f"Pickup results exported: {pickup_export}")

# Delivery results
delivery_results = delivery_processed[['order_id', 'from_city_name', 'poi_lng', 'poi_lat', 'efficiency_score']].copy()
delivery_results_with_pred = delivery_results.iloc[len(delivery_results) - len(y_pred_delivery):].copy()
delivery_results_with_pred['predicted_efficiency'] = y_pred_delivery.flatten()
delivery_results_with_pred = delivery_results_with_pred.reset_index(drop=True)
delivery_results_with_pred['action'] = delivery_recommendations['action'].values[:len(delivery_results_with_pred)]
delivery_results_with_pred['priority'] = delivery_recommendations['priority'].values[:len(delivery_results_with_pred)]
delivery_results_with_pred['improvement_potential'] = delivery_recommendations['improvement_potential'].values[:len(delivery_results_with_pred)]

delivery_export = os.path.join(OUTPUT_PATH, f'delivery_optimization_results_{timestamp}.csv')
delivery_results_with_pred.to_csv(delivery_export, index=False)
print(f"Delivery results exported: {delivery_export}")

Exporting results to CSV...
Pickup results exported: /home/sirin/BIGDATA/quick-result/pickup_optimization_results_20251120_163355.csv
Pickup results exported: /home/sirin/BIGDATA/quick-result/pickup_optimization_results_20251120_163355.csv
Delivery results exported: /home/sirin/BIGDATA/quick-result/delivery_optimization_results_20251120_163355.csv
Delivery results exported: /home/sirin/BIGDATA/quick-result/delivery_optimization_results_20251120_163355.csv


In [18]:
# Export recommendations summary
print("Exporting recommendations summary...")

recommendations_summary = os.path.join(OUTPUT_PATH, f'recommendations_summary_{timestamp}.csv')
combined_recommendations = pd.concat([
    pickup_recommendations.assign(type='pickup'),
    delivery_recommendations.assign(type='delivery')
], ignore_index=True)

combined_recommendations.to_csv(recommendations_summary, index=False)
print(f"Recommendations summary exported: {recommendations_summary}")

Exporting recommendations summary...
Recommendations summary exported: /home/sirin/BIGDATA/quick-result/recommendations_summary_20251120_163355.csv
Recommendations summary exported: /home/sirin/BIGDATA/quick-result/recommendations_summary_20251120_163355.csv


In [21]:
# Generate comprehensive metrics report
print("Generating comprehensive metrics report...")

# Calculate improvement potential only on test sets
pickup_test_improvement = float((y_test_pickup - y_pred_pickup.flatten()).mean())
delivery_test_improvement = float((y_test_delivery - y_pred_delivery.flatten()).mean())

metrics_report = {
    'timestamp': timestamp,
    'data_summary': {
        'pickup_total_orders': len(pickup_processed),
        'delivery_total_orders': len(delivery_processed),
        'pickup_test_orders': len(y_test_pickup),
        'delivery_test_orders': len(y_test_delivery),
        'pickup_cities': pickup_processed['from_city_name'].nunique(),
        'delivery_cities': delivery_processed['from_city_name'].nunique()
    },
    'pickup_model': {
        'test_loss': float(pickup_test_loss),
        'test_mae': float(pickup_test_mae),
        'test_mse': float(pickup_test_mse),
        'avg_efficiency_score': float(pickup_processed['efficiency_score'].mean()),
        'predicted_avg_efficiency': float(y_pred_pickup.mean())
    },
    'delivery_model': {
        'test_loss': float(delivery_test_loss),
        'test_mae': float(delivery_test_mae),
        'test_mse': float(delivery_test_mse),
        'avg_efficiency_score': float(delivery_processed['efficiency_score'].mean()),
        'predicted_avg_efficiency': float(y_pred_delivery.mean())
    },
    'optimization_impact': {
        'pickup_test_improvement_potential': pickup_test_improvement,
        'delivery_test_improvement_potential': delivery_test_improvement,
        'high_priority_pickup_routes': int((pickup_recommendations['priority'] == 'High').sum()),
        'high_priority_delivery_routes': int((delivery_recommendations['priority'] == 'High').sum())
    },
    'files_exported': {
        'pickup_model': pickup_model_path,
        'delivery_model': delivery_model_path,
        'artifacts': artifacts_path,
        'pickup_results': pickup_export,
        'delivery_results': delivery_export,
        'recommendations': recommendations_summary
    }
}

# Save metrics report
metrics_path = os.path.join(OUTPUT_PATH, f'optimization_metrics_{timestamp}.json')
with open(metrics_path, 'w') as f:
    json.dump(metrics_report, f, indent=2)

print(f"Metrics report saved: {metrics_path}")
print("\n=== OPTIMIZATION METRICS ===")
print(json.dumps(metrics_report, indent=2))

Generating comprehensive metrics report...
Metrics report saved: /home/sirin/BIGDATA/quick-result/optimization_metrics_20251120_163355.json

=== OPTIMIZATION METRICS ===
{
  "timestamp": "20251120_163355",
  "data_summary": {
    "pickup_total_orders": 531115,
    "delivery_total_orders": 472419,
    "pickup_test_orders": 106223,
    "delivery_test_orders": 94484,
    "pickup_cities": 5,
    "delivery_cities": 3
  },
  "pickup_model": {
    "test_loss": 0.00017407411360181868,
    "test_mae": 0.011246351525187492,
    "test_mse": 0.00017407411360181868,
    "avg_efficiency_score": 0.20702911406238345,
    "predicted_avg_efficiency": 0.2008327841758728
  },
  "delivery_model": {
    "test_loss": 0.00016298131959047168,
    "test_mae": 0.007678888738155365,
    "test_mse": 0.00016298131959047168,
    "avg_efficiency_score": 0.37366863229853275,
    "predicted_avg_efficiency": 0.38024434447288513
  },
  "optimization_impact": {
    "pickup_test_improvement_potential": 0.006089042872190475

In [20]:
# Final summary and verification
print("\n" + "="*60)
print("DEEP LEARNING ROUTE OPTIMIZATION - SUMMARY")
print("="*60)

print("\n✓ Data Loaded:")
print(f"  - Pickup orders: {len(pickup_processed):,}")
print(f"  - Delivery orders: {len(delivery_processed):,}")

print("\n✓ Models Trained:")
print(f"  - Pickup model layers: {len(pickup_model.layers)}")
print(f"  - Delivery model layers: {len(delivery_model.layers)}")

print("\n✓ Model Performance:")
print(f"  - Pickup MAE: {pickup_test_mae:.6f}")
print(f"  - Delivery MAE: {delivery_test_mae:.6f}")

print("\n✓ Optimization Opportunities:")
print(f"  - High priority pickup routes: {int((pickup_recommendations['priority'] == 'High').sum()):,}")
print(f"  - High priority delivery routes: {int((delivery_recommendations['priority'] == 'High').sum()):,}")

print("\n✓ Output Files Saved to: /home/sirin/BIGDATA/quick-result/")
output_files = sorted(os.listdir(OUTPUT_PATH))
for i, file in enumerate(output_files[-10:], 1):  # Show last 10 files
    print(f"  {i}. {file}")

print("\n" + "="*60)
print("Optimization Complete!")
print("="*60)


DEEP LEARNING ROUTE OPTIMIZATION - SUMMARY

✓ Data Loaded:
  - Pickup orders: 531,115
  - Delivery orders: 472,419

✓ Models Trained:
  - Pickup model layers: 13
  - Delivery model layers: 13

✓ Model Performance:
  - Pickup MAE: 0.011246
  - Delivery MAE: 0.007679

✓ Optimization Opportunities:
  - High priority pickup routes: 52,913
  - High priority delivery routes: 32,612

✓ Output Files Saved to: /home/sirin/BIGDATA/quick-result/
  1. delivery_optimization_results_20251120_163355.csv
  2. delivery_optimizer_model_20251120_163355.h5
  3. pickup_optimization_results_20251120_163355.csv
  4. pickup_optimizer_model_20251120_163355.h5
  5. preprocessing_artifacts_20251120_163355.pkl
  6. recommendations_summary_20251120_163355.csv

Optimization Complete!
