In [1]:
from utils.pkl_preprocessor import PickleBatchLoader
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import pandas as pd

# import pickle
import seaborn as sns
import matplotlib.pyplot as plt
import time
from collections import defaultdict
from utils.cnn import (
    train_cnn_with_batch_loader,
    clear_gpu_memory,
)

2025-08-21 11:59:19.437448: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-21 11:59:19.476055: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


‚öôÔ∏è Configuring GPU and precision...
   GPU configured: ['/physical_device:GPU:0']
   Mixed precision: float16 enabled


2025-08-21 11:59:20.486167: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
def train_cnn_incrementally(batch_loader):
    """
    Function to train CNN on batches from a loader.
    This integrates CNN training into your existing comparison framework.
    """
    print("--- Starting CNN Training ---")

    # Train CNN with the same batch loader
    accuracy = train_cnn_with_batch_loader(
        batch_loader,
        epochs=30,  # Reduced epochs for faster comparison
        batch_size=128,  # Smaller batch size for memory efficiency
        patience=8,  # Early stopping patience
        learning_rate=0.001,
    )

    # Clear GPU memory after training
    clear_gpu_memory()

    return accuracy


In [3]:
def train_xgboost_incrementally(batch_loader):
    """
    Function to train XGBoost incrementally on batches from a loader.
    This version corrects the incremental training logic.
    """
    # 1. Instantiate the classifier ONCE before the loop.
    model = xgb.XGBClassifier(
        objective="binary:logistic",
        max_depth=6,
        learning_rate=0.1,
        n_estimators=100,
        random_state=42,
    )

    # This will hold the trained Booster object from the previous iteration.
    trained_booster = None

    print("--- Starting Incremental XGBoost Training ---")

    # 2. Loop through batches
    for i, (X_batch, y_batch) in enumerate(batch_loader.batch_generator()):
        print(f"  Training XGBoost on batch {i + 1}/{len(batch_loader)}...")

        # For the first batch, trained_booster is None.
        # For subsequent batches, it's the model from the last step.
        model.fit(X_batch, y_batch, xgb_model=trained_booster)

        # 3. Get the underlying booster to pass to the next iteration
        trained_booster = model.get_booster()

    # 4. Evaluation on the hold-out test set
    print("  Evaluating final XGBoost model...")
    X_test, y_test = batch_loader.get_test_set()
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)

    return accuracy * 100


In [4]:
def train_random_forest_incrementally(batch_loader):
    """
    Function to train Random Forest using warm_start on batches.
    """
    # warm_start=True is key for incremental additions
    rf_model = RandomForestClassifier(n_estimators=5, random_state=42, warm_start=True)
    print("--- Starting Incremental Random Forest Training ---")

    # Training loop
    for i, (X_batch, y_batch) in enumerate(batch_loader.batch_generator()):
        print(f"  Training Random Forest on batch {i + 1}/{len(batch_loader)}...")
        rf_model.fit(X_batch, y_batch)
        # Increase the number of estimators for the next batch
        rf_model.n_estimators += 5

    # Evaluation on the hold-out test set
    print("  Evaluating final Random Forest model...")
    X_test, y_test = batch_loader.get_test_set()
    predictions = rf_model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    return accuracy * 100


In [5]:
def train_gradient_boosting_incrementally(batch_loader):
    """
    Function to train Gradient Boosting using warm_start on batches.
    """
    # warm_start=True is key for incremental additions
    gb_model = GradientBoostingClassifier(
        n_estimators=5, learning_rate=0.1, max_depth=6, random_state=42, warm_start=True
    )
    print("--- Starting Incremental Gradient Boosting Training ---")

    # Training loop
    for i, (X_batch, y_batch) in enumerate(batch_loader.batch_generator()):
        print(f"  Training Gradient Boosting on batch {i + 1}/{len(batch_loader)}...")
        gb_model.fit(X_batch, y_batch)
        # Increase the number of estimators for the next batch
        gb_model.n_estimators += 5

    # Evaluation on the hold-out test set
    print("  Evaluating final Gradient Boosting model...")
    X_test, y_test = batch_loader.get_test_set()
    predictions = gb_model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    return accuracy * 100


In [6]:
def time_algorithm(func, *args, **kwargs):
    """Utility function to time algorithm execution"""
    start_time = time.time()
    result = func(*args, **kwargs)
    end_time = time.time()
    execution_time = end_time - start_time
    return result, execution_time


# Configuration


In [7]:
BATCH_SIZE = 100096
TEST_ROUND = 4
TEST_DELTAS = [105, 106, 128]

print(f"\n{'=' * 60}")
print(f"Testing Round {TEST_ROUND} of SM4 with Deltas: {TEST_DELTAS}")
print(f"{'=' * 60}\n")



Testing Round 4 of SM4 with Deltas: [105, 106, 128]



In [8]:
results_data = {
    "delta": [],
    "xgboost_accuracy": [],
    "random_forest_accuracy": [],
    "gradient_boosting_accuracy": [],
    "cnn_accuracy": [],  # Add this line
    "xgboost_time": [],
    "random_forest_time": [],
    "gradient_boosting_time": [],
    "cnn_time": [],  # Add this line
}


In [9]:
for delta in TEST_DELTAS:
    pickle_file = f"dataset_pkl_round_{TEST_ROUND}/SM4_{TEST_ROUND}_round_delta-{delta}_combined.pkl"

    print(f"\n{'=' * 50}")
    print(f"Processing Delta {delta} from file: {pickle_file}")
    print(f"{'=' * 50}\n")

    try:
        batch_loader = PickleBatchLoader(pickle_file, batch_size=BATCH_SIZE)

        # Train and time each algorithm (including CNN)
        print("Training XGBoost...")
        xgboost_accuracy, xgboost_time = time_algorithm(
            train_xgboost_incrementally, batch_loader
        )

        print("Training Random Forest...")
        random_forest_accuracy, random_forest_time = time_algorithm(
            train_random_forest_incrementally, batch_loader
        )

        print("Training Gradient Boosting...")
        gradient_boosting_accuracy, gradient_boosting_time = time_algorithm(
            train_gradient_boosting_incrementally, batch_loader
        )

        # Add CNN training
        print("Training CNN...")
        cnn_accuracy, cnn_time = time_algorithm(train_cnn_incrementally, batch_loader)

        # Store results (including CNN)
        results_data["delta"].append(delta)
        results_data["xgboost_accuracy"].append(xgboost_accuracy)
        results_data["random_forest_accuracy"].append(random_forest_accuracy)
        results_data["gradient_boosting_accuracy"].append(gradient_boosting_accuracy)
        results_data["cnn_accuracy"].append(cnn_accuracy)  # Add this line
        results_data["xgboost_time"].append(xgboost_time)
        results_data["random_forest_time"].append(random_forest_time)
        results_data["gradient_boosting_time"].append(gradient_boosting_time)
        results_data["cnn_time"].append(cnn_time)  # Add this line

        print(f"\nDelta {delta} Results:")
        print(f"XGBoost - Accuracy: {xgboost_accuracy:.4f}%, Time: {xgboost_time:.2f}s")
        print(
            f"Random Forest - Accuracy: {random_forest_accuracy:.4f}%, Time: {random_forest_time:.2f}s"
        )
        print(
            f"Gradient Boosting - Accuracy: {gradient_boosting_accuracy:.4f}%, Time: {gradient_boosting_time:.2f}s"
        )
        print(
            f"CNN - Accuracy: {cnn_accuracy:.4f}%, Time: {cnn_time:.2f}s"
        )  # Add this line

    except FileNotFoundError:
        print(f"File not found: {pickle_file}. Skipping delta {delta}.")
        continue
    except Exception as e:
        print(f"An error occurred during delta {delta}: {e}")
        continue



Processing Delta 105 from file: dataset_pkl_round_4/SM4_4_round_delta-105_combined.pkl

Loading data from dataset_pkl_round_4/SM4_4_round_delta-105_combined.pkl...
Preprocessing hold-out test set...
Loader initialized. Ready to generate batches.
Training XGBoost...
--- Starting Incremental XGBoost Training ---
  Training XGBoost on batch 1/2...
  Training XGBoost on batch 2/2...
  Evaluating final XGBoost model...
Training Random Forest...
--- Starting Incremental Random Forest Training ---
  Training Random Forest on batch 1/2...
  Training Random Forest on batch 2/2...
  Evaluating final Random Forest model...
Training Gradient Boosting...
--- Starting Incremental Gradient Boosting Training ---
  Training Gradient Boosting on batch 1/2...
  Training Gradient Boosting on batch 2/2...
  Evaluating final Gradient Boosting model...
Training CNN...
--- Starting CNN Training ---
üöÄ Training CNN with batch loader...
üìÇ Loading data from batch loader for CNN...
   Collecting batches...


I0000 00:00:1755757787.941211  112205 gpu_process_state.cc:208] Using CUDA malloc Async allocator for GPU: 0
I0000 00:00:1755757787.944166  112205 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4140 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


‚úÖ Optimized CNN Model created!
   Total parameters: 1,651,233
   Estimated memory usage: ~3.1 MB
   Starting training with batch_size=128, epochs=30
Epoch 1/30


2025-08-21 11:59:52.623697: I external/local_xla/xla/service/service.cc:163] XLA service 0x7f2e800165c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-08-21 11:59:52.623717: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
2025-08-21 11:59:52.730314: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-08-21 11:59:54.042685: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91200
2025-08-21 11:59:54.617953: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-08-21 11:59:54.

[1m  17/1001[0m [37m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [1m10s[0m 10ms/step - accuracy: 0.4922 - loss: 1.2273

I0000 00:00:1755757804.484066  112598 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m 998/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m‚îÅ[0m [1m0s[0m 10ms/step - accuracy: 0.4997 - loss: 0.9363

2025-08-21 12:00:15.827018: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-08-21 12:00:15.827187: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.



[1m1001/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.4997 - loss: 0.9362


2025-08-21 12:00:27.885966: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.


[1m1001/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m41s[0m 25ms/step - accuracy: 0.5033 - loss: 0.8973 - val_accuracy: 0.5050 - val_loss: 0.8662 - learning_rate: 0.0010
Epoch 2/30
[1m1001/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.5199 - loss: 0.8352 - val_accuracy: 0.5163 - val_loss: 0.8047 - learning_rate: 0.0010
Epoch 3/30
[1m1001/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.5482 - loss: 0.7822 - val_accuracy: 0.5017 - val_loss: 0.7881 - learning_rate: 0.0010
Epoch 4/30
[1m1001/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m11s[0m 11ms/step - accuracy: 0.5856 - loss: 0.7533 - val_accuracy: 0.5150 - val_loss: 0.7792 - learning_rate: 0.0010
Epoch 5/30
[1m1001/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ

2025-08-21 12:05:12.605935: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-08-21 12:05:12.606128: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.

2025-08-21 12:05:15.003339: E external/local_xla/xla/stream_executor/gpu/gpu_cudamallocasync_allocator.cc:361] gpu_async_0 cuMemAllocAsync failed to allocate 10842275840 bytes: RESOURCE_EXHAUSTED: : CUDA_ERROR_OUT_OF_MEMORY: out of memory
 Reported by CUDA: Free memory/Total memory: 1575288832/6086262784
2025-08-21 12:05:15.003357: E external/local_xla/xla/

[1m 999/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m‚îÅ[0m [1m0s[0m 17ms/step - accuracy: 0.5063 - loss: 0.9319

2025-08-21 12:05:43.490946: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-08-21 12:05:43.491103: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.


2025-08-21 12:05:46.243473: E external/local_xla/xla/stream_executor/gpu/gpu_cudamallocasync_allocator.cc:361] gpu_async_0 cuMemAllocAsync failed to allocate 10334830592 bytes: RESOURCE_EXHAUSTED: : CUDA_ERROR_OUT_OF_MEMORY: out of memory
 Reported by CUDA: Free memory/Total memory: 1573191680/6086262784
2025-08-21 12:05:46.243502: E external/local_xla/xla

[1m1001/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.5063 - loss: 0.9318

2025-08-21 12:06:01.881918: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.


[1m1001/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m58s[0m 40ms/step - accuracy: 0.5115 - loss: 0.8939 - val_accuracy: 0.5003 - val_loss: 0.8642 - learning_rate: 0.0010
Epoch 2/30
[1m1001/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m19s[0m 19ms/step - accuracy: 0.5433 - loss: 0.8309 - val_accuracy: 0.5174 - val_loss: 0.8211 - learning_rate: 0.0010
Epoch 3/30
[1m1001/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m19s[0m 19ms/step - accuracy: 0.5791 - loss: 0.7808 - val_accuracy: 0.5212 - val_loss: 0.8112 - learning_rate: 0.0010
Epoch 4/30
[1m1001/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m18s[0m 18ms/step - accuracy: 0.6022 - loss: 0.7487 - val_accuracy: 0.5020 - val_loss: 0.8889 - learning_rate: 0.0010
Epoch 5/30
[1m1001/1001[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ

In [10]:
results_df = pd.DataFrame(results_data)
results_df.set_index("delta", inplace=True)

print(f"\n{'=' * 60}")
print("Final Results DataFrame:")
print(f"{'=' * 60}")
print(results_df)



Final Results DataFrame:
       xgboost_accuracy  random_forest_accuracy  gradient_boosting_accuracy  \
delta                                                                         
105           99.947551               81.345688                   95.134744   
106           99.940058               77.082345                   92.799520   
128           99.955044               80.389121                   95.991408   

       cnn_accuracy  xgboost_time  random_forest_time  gradient_boosting_time  \
delta                                                                           
105       88.233274      2.453444            2.480797               20.820648   
106       89.204210      2.679013            2.107248               18.146357   
128       89.613187      5.671495            2.825794               23.377944   

         cnn_time  
delta              
105    294.020774  
106    463.251171  
128    617.806511  


In [11]:
results_df.to_pickle(f"model_comparison_results_round_{TEST_ROUND}_with_CNN.pkl")
results_df.to_csv(f"model_comparison_results_round_{TEST_ROUND}_with_CNN.csv")
print(
    f"\nResults saved as 'model_comparison_results_round_{TEST_ROUND}_with_CNN.pkl' and .csv"
)



Results saved as 'model_comparison_results_round_4_with_CNN.pkl' and .csv


In [12]:
results_df = pd.read_pickle(f"model_comparison_results_round_{TEST_ROUND}_with_CNN.pkl")

In [13]:
avg_times = {
    "XGBoost": results_df["xgboost_time"].mean(),
    "Random Forest": results_df["random_forest_time"].mean(),
    "Gradient Boosting": results_df["gradient_boosting_time"].mean(),
    "CNN": results_df["cnn_time"].mean(),  # Add this line
}

print(f"\nAverage Training Times:")
for algo, avg_time in avg_times.items():
    print(f"{algo}: {avg_time:.2f}s")



Average Training Times:
XGBoost: 3.60s
Random Forest: 2.47s
Gradient Boosting: 20.78s
CNN: 458.36s


In [14]:
plt.style.use("default")
sns.set_palette("husl")
sns.set_context("notebook", font_scale=1.2)


In [15]:
# Create visualizations
fig = plt.figure(figsize=(16, 8))


<Figure size 1600x800 with 0 Axes>

In [16]:
print(f"\n{'=' * 80}")
print("Detailed Comparison Summary")
print(f"{'=' * 80}")

comparison_summary = pd.DataFrame(
    {
        "Algorithm": ["XGBoost", "Random Forest", "Gradient Boosting", "CNN"],
        "Avg Time (s)": [
            avg_times["XGBoost"],
            avg_times["Random Forest"],
            avg_times["Gradient Boosting"],
            avg_times["CNN"],
        ],
        "Best Accuracy (%)": [
            max([results_df.loc[d, "xgboost_accuracy"] for d in TEST_DELTAS]),
            max([results_df.loc[d, "random_forest_accuracy"] for d in TEST_DELTAS]),
            max([results_df.loc[d, "gradient_boosting_accuracy"] for d in TEST_DELTAS]),
            max([results_df.loc[d, "cnn_accuracy"] for d in TEST_DELTAS]),
        ],
        "Worst Accuracy (%)": [
            min([results_df.loc[d, "xgboost_accuracy"] for d in TEST_DELTAS]),
            min([results_df.loc[d, "random_forest_accuracy"] for d in TEST_DELTAS]),
            min([results_df.loc[d, "gradient_boosting_accuracy"] for d in TEST_DELTAS]),
            min([results_df.loc[d, "cnn_accuracy"] for d in TEST_DELTAS]),
        ],
    }
)

print(comparison_summary.to_string(index=False))

# Update performance rankings to include CNN
best_accuracies = [
    ("XGBoost", max([results_df.loc[d, "xgboost_accuracy"] for d in TEST_DELTAS])),
    (
        "Random Forest",
        max([results_df.loc[d, "random_forest_accuracy"] for d in TEST_DELTAS]),
    ),
    (
        "Gradient Boosting",
        max([results_df.loc[d, "gradient_boosting_accuracy"] for d in TEST_DELTAS]),
    ),
    ("CNN", max([results_df.loc[d, "cnn_accuracy"] for d in TEST_DELTAS])),
]



Detailed Comparison Summary
        Algorithm  Avg Time (s)  Best Accuracy (%)  Worst Accuracy (%)
          XGBoost      3.601317          99.955044           99.940058
    Random Forest      2.471280          81.345688           77.082345
Gradient Boosting     20.781650          95.991408           92.799520
              CNN    458.359485          89.613187           88.233274


In [17]:
# Performance ranking
print(f"\n{'=' * 60}")
print("Performance Rankings")
print(f"{'=' * 60}")

print("Fastest to Slowest (by average time):")
sorted_by_time = sorted(avg_times.items(), key=lambda x: x[1])
for i, (algo, time_val) in enumerate(sorted_by_time, 1):
    print(f"{i}. {algo}: {time_val:.2f}s")

print("\nBest to Worst (by best accuracy achieved):")
sorted_by_accuracy = sorted(best_accuracies, key=lambda x: x[1], reverse=True)
for i, (algo, acc) in enumerate(sorted_by_accuracy, 1):
    print(f"{i}. {algo}: {acc:.4f}%")



Performance Rankings
Fastest to Slowest (by average time):
1. Random Forest: 2.47s
2. XGBoost: 3.60s
3. Gradient Boosting: 20.78s
4. CNN: 458.36s

Best to Worst (by best accuracy achieved):
1. XGBoost: 99.9550%
2. Gradient Boosting: 95.9914%
3. CNN: 89.6132%
4. Random Forest: 81.3457%
