In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import time
import random
from sklearn.calibration import CalibratedClassifierCV
from tensorflow.keras.preprocessing.image import ImageDataGenerator


(X_train, y_train), (X_test, y_test) = cifar10.load_data()


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [None]:
X_train, X_test = X_train / 255.0, X_test / 255.0
y_train = y_train.flatten()
y_test = y_test.flatten()
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
def extract_basic_features(images):
    return images.mean(axis=(1, 2))

X_train_basic = extract_basic_features(X_train)
X_val_basic = extract_basic_features(X_val)
X_test_basic = extract_basic_features(X_test)

In [None]:

# Updated Decision Tree with Enhanced Depth and Calibration
base_decision_tree = DecisionTreeClassifier(max_depth=15, min_samples_split=5, random_state=42)

decision_tree = CalibratedClassifierCV(estimator=base_decision_tree, method='sigmoid', cv=5)
decision_tree.fit(X_train_basic, y_train)

val_probs_tree = decision_tree.predict_proba(X_val_basic)
val_preds_tree = decision_tree.predict(X_val_basic)

val_accuracy_tree = accuracy_score(y_val, val_preds_tree)
print(f"Decision Tree Validation Accuracy with Calibration: {val_accuracy_tree:.2f}")


Decision Tree Validation Accuracy with Calibration: 0.21


In [None]:
# Improved CNN Model with Additional Layers and Data Augmentation
data_gen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)

cnn_model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.4),

    tf.keras.layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Dropout(0.4),

    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(10, activation='softmax')
])

cnn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0003),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

cnn_model.fit(data_gen.flow(X_train, y_train, batch_size=64), epochs=15, validation_data=(X_val, y_val))


Epoch 1/15
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1161s[0m 2s/step - accuracy: 0.2567 - loss: 2.4066 - val_accuracy: 0.4660 - val_loss: 1.4986
Epoch 2/15
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1164s[0m 2s/step - accuracy: 0.4315 - loss: 1.5688 - val_accuracy: 0.5548 - val_loss: 1.2047
Epoch 3/15
[1m704/704[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1148s[0m 2s/step - accuracy: 0.5049 - loss: 1.3735 - val_accuracy: 0.5486 - val_loss: 1.2972
Epoch 4/15
[1m 81/704[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m16:31[0m 2s/step - accuracy: 0.5687 - loss: 1.2198

In [None]:
def hybrid_predict_optimized(X, basic_features, decision_tree, cnn_model, batch_size=32, confidence_threshold=0.7):
    decision_tree_probs = decision_tree.predict_proba(basic_features)
    decision_tree_preds = np.argmax(decision_tree_probs, axis=1)
    decision_tree_confidences = np.max(decision_tree_probs, axis=1)

    hybrid_preds = []
    num_samples = len(X)


    cnn_preds_for_uncertain = []

    for start in range(0, num_samples, batch_size):
        end = min(start + batch_size, num_samples)


        batch_decision_tree_preds = decision_tree_preds[start:end]
        batch_decision_tree_confidences = decision_tree_confidences[start:end]

        uncertain_indices = np.where(batch_decision_tree_confidences < confidence_threshold)[0]


        uncertain_batch = X[start:end][uncertain_indices]

        if len(uncertain_batch) > 0:
            cnn_preds = np.argmax(cnn_model.predict(uncertain_batch), axis=1)
            cnn_preds_for_uncertain.extend(cnn_preds)

        for i in range(len(batch_decision_tree_preds)):
            if batch_decision_tree_confidences[i] >= confidence_threshold:
                hybrid_preds.append(batch_decision_tree_preds[i])
            else:
                hybrid_preds.append(cnn_preds_for_uncertain.pop(0))

    return np.array(hybrid_preds)


In [None]:
def test_hybrid_model_on_subset(X, basic_features, decision_tree, cnn_model, sample_size=200, batch_size=32, confidence_threshold=0.7):
    indices = random.sample(range(len(X)), sample_size)
    X_subset = X[indices]
    basic_features_subset = basic_features[indices]

    decision_tree_probs = decision_tree.predict_proba(basic_features_subset)
    decision_tree_confidences = np.max(decision_tree_probs, axis=1)

    cnn_predictions_count = np.sum(decision_tree_confidences < confidence_threshold)
    predictions = hybrid_predict_optimized(X_subset, basic_features_subset, decision_tree, cnn_model, batch_size=batch_size, confidence_threshold=confidence_threshold)

    return predictions, indices, cnn_predictions_count

def cnn_predict(X, cnn_model, batch_size=32):
    num_samples = len(X)
    cnn_preds = []
    for start in range(0, num_samples, batch_size):
        end = min(start + batch_size, num_samples)
        batch = X[start:end]
        batch_preds = np.argmax(cnn_model.predict(batch), axis=1)
        cnn_preds.extend(batch_preds)
    return np.array(cnn_preds)

In [None]:

sample_size = 600
batch_size = 100
confidence_threshold = 0.6


start_time = time.time()
hybrid_predictions, test_indices, cnn_predictions_count = test_hybrid_model_on_subset(X_test, X_test_basic, decision_tree, cnn_model, sample_size=sample_size, batch_size=batch_size, confidence_threshold=confidence_threshold)
hybrid_time = time.time() - start_time


X_test_subset = X_test[test_indices]
start_time = time.time()
cnn_predictions = cnn_predict(X_test_subset, cnn_model, batch_size=batch_size)
cnn_time = time.time() - start_time


y_test_subset = y_test[test_indices]
hybrid_accuracy = accuracy_score(y_test_subset, hybrid_predictions)
cnn_accuracy = accuracy_score(y_test_subset, cnn_predictions)


total_predictions_in_subset = len(X_test_subset)
overload_reduction = total_predictions_in_subset - cnn_predictions_count
overload_reduction_percentage = (overload_reduction / total_predictions_in_subset) * 100


print(f"Hybrid Model - Prediction Time: {hybrid_time:.4f} seconds, Accuracy: {hybrid_accuracy:.4f}")
print(f"CNN Model - Prediction Time: {cnn_time:.4f} seconds, Accuracy: {cnn_accuracy:.4f}")
print(f"Overload Reduction on CNN: {overload_reduction} predictions, {overload_reduction_percentage:.2f}% reduction")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
Hybrid Model - Prediction Time: 3.1171 seconds, Accuracy: 0.7317
CNN Model - Prediction Time: 2.9926 seconds, Accuracy: 0.7333
Overl