In [None]:
# ============================================================
# Import required libraries for data processing, modeling,
# evaluation metrics, statistical tests, and reproducibility
# ============================================================
import os, time, random
import numpy as np
import tensorflow as tf
import pandas as pd
from scipy.stats import ttest_rel
from sklearn.metrics import precision_score, recall_score, f1_score
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import get_custom_objects
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, Dropout, Activation,Bidirectional 
from sklearn.metrics import f1_score, classification_report
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorboard.plugins.hparams import api as hp
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.datasets import mnist, fashion_mnist, cifar100
from scipy.stats import friedmanchisquare
from scipy.stats import wilcoxon
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D, Add, Input, Flatten, GlobalAveragePooling2D
from tensorflow.keras.models import Model



# ============================================================
# Dataset Loading and Preprocessing
# Datasets are loaded from standard Keras libraries to ensure
# accessibility and full reproducibility
# ============================================================

In [None]:
# -----------------------------
# MNIST and Fashion-Mnist Datasets preprocessing
# - Normalization to [0,1]
# - Reshaping for CNN input
# - One-hot encoding of labels
# -----------------------------
# 1- MNIST

(X_train_mnist, y_train_mnist), (X_test_mnist, y_test_mnist) = mnist.load_data()

X_train_mnist = X_train_mnist.astype("float32") / 255.0
X_test_mnist  = X_test_mnist.astype("float32") / 255.0

X_train_mnist = X_train_mnist.reshape(-1, 28, 28, 1)
X_test_mnist  = X_test_mnist.reshape(-1, 28, 28, 1)

y_train_mnist = to_categorical(y_train_mnist, 10)
y_test_mnist  = to_categorical(y_test_mnist, 10)

In [None]:

# =============================
# 2. Fashion-MNIST
# =============================
(X_train_fm, y_train_fm), (X_test_fm, y_test_fm) = fashion_mnist.load_data()

X_train_fm = X_train_fm.astype("float32") / 255.0
X_test_fm  = X_test_fm.astype("float32") / 255.0

X_train_fm = X_train_fm.reshape(-1, 28, 28, 1)
X_test_fm  = X_test_fm.reshape(-1, 28, 28, 1)

y_train_fm = to_categorical(y_train_fm, 10)
y_test_fm  = to_categorical(y_test_fm, 10)

In [None]:

# =============================
# 3. CIFAR-100
# =============================
(X_train_c100, y_train_c100), (X_test_c100, y_test_c100) = cifar100.load_data(label_mode='fine')

X_train_c100 = X_train_c100.astype("float32") / 255.0
X_test_c100  = X_test_c100.astype("float32") / 255.0

y_train_c100 = to_categorical(y_train_c100, 100)


# ============================================================
# Display dataset shapes for verification
# ============================================================
print("MNIST:", X_train_mnist.shape, y_train_mnist.shape)
print("Fashion-MNIST:", X_train_fm.shape, y_train_fm.shape)
print("CIFAR-100:", X_train_c100.shape, y_train_c100.shape)
#==================================================================

In [None]:
# ============================================================
# List all available built-in activation functions in TensorFlow
# ============================================================

activation_functions = dir(tf.keras.activations)
# Filter out the built-in attributes
activation_functions = [func for func in activation_functions if not func.startswith('__')]
print(activation_functions)

In [None]:
# ============================================================
# Definition and registration of custom and baseline
# activation functions used in the experiments
# ============================================================
# Define the MishRelu activation function
# Custom MishReLU activation function

def MishRelU(x):
    return tf.where(x > 0, x, x * tf.keras.activations.tanh(tf.keras.activations.softplus(x)))
get_custom_objects()['MishRelU'] = MishRelU


In [None]:
#Define Baseline ReLU activation function

def ReLU(x):
    return tf.keras.activations.relu(x)
get_custom_objects()['ReLU'] = ReLU


In [None]:
#Define Baseline Mish activation function
def Mish(x):
    return x * tf.keras.activations.tanh(tf.keras.activations.softplus(x))
get_custom_objects()['Mish'] = Mish

In [None]:
#Define Baseline Elu activation function
def Elu(x,a=1):
    return  tf.keras.activations.elu(x, alpha=a)    
get_custom_objects()['Elu'] = Elu


In [None]:
#Define  Baseline LeakyReLU activation function
def LeakyReLU(x):
    return tf.keras.layers.LeakyReLU(alpha=0.01)(x)
get_custom_objects()['LeakyReLU'] = LeakyReLU


In [None]:
#Define Baseline Selu activation function
def Selu(x):
    return tf.keras.activations.selu(x)
get_custom_objects()['Selu'] = Selu


In [None]:
# ============================================================
# Reproducibility setup
# Random seeds 
# ============================================================
def set_seed(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)



# ============================================================
# Multi-Layer Perceptron (MLP) model for Fashion-MNIST
# ============================================================

In [None]:
def build_fashion_model_MLP(activation, optimizer_name='adam', learning_rate=0.001):
    # Sequential model
    model = keras.Sequential()
    model.add(layers.Input(shape=(28, 28)))
    model.add(layers.Flatten())

    model.add(layers.Dense(units=398))
    model.add(layers.Activation(activation))
    model.add(BatchNormalization())
    model.add(layers.Dropout(rate=0.1)) 
    
    model.add(layers.Dense(units=128))
    model.add(layers.Activation(activation))
    model.add(BatchNormalization())
    model.add(layers.Dropout(rate=0.1))
    
    model.add(layers.Dense(units=64))
    model.add(layers.Activation(activation))
    model.add(BatchNormalization())
    model.add(layers.Dropout(rate=0.1))
    
    # Output layer
    model.add(layers.Dense(units=10, activation='softmax'))

    # --------- اختيار الـ Optimizer ---------
    optimizer_name = optimizer_name.lower()
    
    if optimizer_name == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer_name == 'nadam':
        optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate)
    else:
        raise ValueError(f"Unknown optimizer: {optimizer_name}")

    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

# ============================================================
# Convolutional Neural Network (CNN) model for MNIST
# ============================================================

In [None]:



def build_mnist_model_cnn3(activation, optimizer_name='adam', learning_rate=0.001):
    model = Sequential()

    # Input layer
    model.add(Input(shape=(28, 28, 1)))

    model.add(layers.Conv2D(32, (3,3), padding='same'))
    model.add(layers.Activation(activation))
    model.add(BatchNormalization())
    model.add(layers.MaxPooling2D((2,2), strides=2))
    model.add(Dropout(0.25))

    model.add(layers.Conv2D(64, (3,3), padding='same'))
    model.add(layers.Activation(activation))
    model.add(BatchNormalization())
    model.add(layers.MaxPooling2D((2,2), strides=2))
    model.add(Dropout(0.25))

    model.add(layers.Conv2D(128, (3,3), padding='same'))
    model.add(layers.Activation(activation))
    model.add(BatchNormalization())
    model.add(layers.MaxPooling2D((2,2), strides=2))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(layers.Dense(512))
    model.add(layers.Activation(activation))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    model.add(layers.Dense(10, activation='softmax'))

    # --------- اختيار الـ Optimizer ---------
    optimizer_name = optimizer_name.lower()

    if optimizer_name == 'adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    elif optimizer_name == 'rmsprop':
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)

    elif optimizer_name == 'nadam':
        optimizer = tf.keras.optimizers.Nadam(learning_rate=learning_rate)
    else:
        raise ValueError(f"Unknown optimizer: {optimizer_name}")

    # compile model
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

#================== ResNet-18 block =============================
# ResNet-18 architecture adapted for CIFAR-100 dataset
# ============================================================


In [None]:
def resnet_block(x, filters, stride=1):
    shortcut = x

    x = Conv2D(filters, 3, strides=stride, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(filters, 3, strides=1, padding='same', use_bias=False)(x)
    x = BatchNormalization()(x)

    if stride != 1 or shortcut.shape[-1] != filters:
        shortcut = Conv2D(filters, 1, strides=stride, padding='same', use_bias=False)(shortcut)
        shortcut = BatchNormalization()(shortcut)

    x = Add()([x, shortcut])
    x = Activation('relu')(x)
    return x


In [None]:
def ResNet18_CIFAR(input_shape=(32,32,3), num_classes=100, activation_fc='relu'):
    inputs = Input(shape=input_shape)

    x = Conv2D(64, 3, padding='same', use_bias=False)(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # 8 basic blocks = ResNet-18
    x = resnet_block(x, 64)
    x = resnet_block(x, 64)

    x = resnet_block(x, 128, stride=2)
    x = resnet_block(x, 128)

    x = resnet_block(x, 256, stride=2)
    x = resnet_block(x, 256)

    x = resnet_block(x, 512, stride=2)
    x = resnet_block(x, 512)

    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation=activation_fc)(x)
    outputs = Dense(num_classes, activation='softmax', dtype='float32')(x)

    return Model(inputs, outputs)

In [None]:
def build_model(
    input_shape=(32,32,3),
    activation_fc='relu',
    learning_rate=0.001,
    num_classes=100):

    model = ResNet18_CIFAR(
        input_shape=input_shape,
        num_classes=num_classes,
        activation_fc=activation_fc
    )

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(
        optimizer=optimizer,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model



# ====================================
# Model training and evaluation
# - Multiple random seeds
# - Different learning rates
# - Mean and standard deviation are reported
# ====================================

In [None]:
SEEDS = [random.randint(0, 10000) for _ in range(3)]
all_results = []  # تخزين كل النتائج

activation_functions = [MishRelU, ReLU, Mish, Elu, LeakyReLU, Selu]
learning_rates = [0.01,0.001,0.0001]
optimizers = ["nadam"]

for activation in activation_functions:
    act_name = activation.__name__

    for optimizer_name in optimizers:
        for learning_rate in learning_rates:

            print(f"\n>>> Training with {act_name}, {optimizer_name}, lr={learning_rate}")
            print("Seeds for this run:", SEEDS)

            accs = []
            precisions = []
            recalls = []
            f1s = []

            start = time.time()

            for seed in SEEDS:
                set_seed(seed)

                #BUILD MODEL
                model = build_mnist_model_cnn3(
                    activation,
                    optimizer_name=optimizer_name,
                    learning_rate=learning_rate
                )

                # TRAUNING
                history = model.fit(
                    X_train_mnist, y_train_mnist,
                    epochs=30,
                    validation_split=0.2, 
                    batch_size=60,
                    verbose=0,
                    #callbacks=[early_stop]
                )

                # التقييم accuracy
                test_loss, test_acc = model.evaluate(X_test_mnist, y_test_mnist, verbose=0)
                accs.append(test_acc)

                # حساب precision, recall, f1
                y_pred_probs = model.predict(X_test_mnist, verbose=0)
                y_pred = np.argmax(y_pred_probs, axis=1)
                y_true = np.argmax(y_test_mnist, axis=1)

                precisions.append(precision_score(y_true, y_pred, average='macro'))
                recalls.append(recall_score(y_true, y_pred, average='macro'))
                f1s.append(f1_score(y_true, y_pred, average='macro'))

            end = time.time()
            total_minutes = (end - start) / 60

            # المتوسط والانحراف
            mean_acc = np.mean(accs)
            std_acc = np.std(accs)

            mean_prec = np.mean(precisions)
            std_prec = np.std(precisions)

            mean_rec = np.mean(recalls)
            std_rec = np.std(recalls)

            mean_f1 = np.mean(f1s)
            std_f1 = np.std(f1s)

            print(f"{act_name}, lr={learning_rate} | "
                  f"Acc: {mean_acc:.4f}±{std_acc:.4f} | "
                  f"Prec: {mean_prec:.4f}±{std_prec:.4f} | "
                  f"Rec: {mean_rec:.4f}±{std_rec:.4f} | "
                  f"F1: {mean_f1:.4f}±{std_f1:.4f} "
                  f"({len(SEEDS)} seeds) | Time: {total_minutes:.2f} min")

            all_results.append({
                "activation": act_name,
                "optimizer": optimizer_name,
                "lr": learning_rate,
                "mean_acc": mean_acc,
                "std_acc": std_acc,
                "mean_prec": mean_prec,
                "std_prec": std_prec,
                "mean_rec": mean_rec,
                "std_rec": std_rec,
                "mean_f1": mean_f1,
                "std_f1": std_f1,
                "all_accs": accs,
                "all_precs": precisions,
                "all_recs": recalls,
                "all_f1s": f1s
            })


# ============================================================
# Aggregate results across runs and convert to DataFrame
# ============================================================

df_results = pd.DataFrame(all_results)
print("\n=== Summary Results ===")
print(df_results[[
    "activation","lr",
    "mean_acc","std_acc",
    "mean_prec","std_prec",
    "mean_rec","std_rec",
    "mean_f1","std_f1"
]])



# ============================================================
# Statistical significance analysis
# Friedman test followed by post-hoc Wilcoxon test
# ============================================================

In [None]:

activations = ["MishRelU", "ReLU", "Mish", "Elu", "LeakyReLU", "Selu"]
learning_rates = [0.01, 0.001, 0.0001]

metrics = ["all_accs", "all_precs", "all_recs", "all_f1s"]

for lr in learning_rates:
    print(f"\n=== Friedman Test for lr = {lr} ===")
    for metric in metrics:
        #
        values = [df_results.query(f"activation=='{act}' and lr=={lr}")[metric].values[0] for act in activations]
        stat, p = friedmanchisquare(*values)
        print(f"{metric.replace('all_', '').capitalize()} -> Friedman statistic = {stat:.4f}, p-value = {p:.6f}")


#========== Post-hoc Wilcoxon Test ============= 

In [None]:
activations = ["MishRelU", "ReLU", "Mish", "Elu", "LeakyReLU", "Selu"]
metrics = ["all_accs", "all_precs", "all_recs", "all_f1s"]
lr = 0.0001  

def get_values(act, metric):
    return df_results.query(f"activation=='{act}' and lr=={lr}")[metric].values[0]

for metric in metrics:
    print("\n===================================")
    print("Post-hoc Wilcoxon Test | Metric:", metric.replace("all_","").upper())
    print("===================================\n")

    # Collect the performance values for each activation function
    data = {act: get_values(act, metric) for act in activations}

    # Compute the mean performance to rank activation functions
    avg_scores = {act: np.mean(vals) for act, vals in data.items()}

    print("Average Scores:")
    for act, score in avg_scores.items():
        print(f"{act}: {score:.4f}")

    # the best function
    best_act = max(avg_scores, key=avg_scores.get)
    print(f"\n==> BEST activation (based on mean): {best_act}\n")

    # Wilcoxon between each pair
    print("Wilcoxon Pairwise p-values:")
    for i in range(len(activations)):
        for j in range(i+1, len(activations)):
            act1 = activations[i]
            act2 = activations[j]
            stat, p = wilcoxon(data[act1], data[act2])
            print(f"{act1} vs {act2}: p = {p:.6f}")