CNN + LSTM

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Mount Google Drive if dataset is stored there
from google.colab import drive
# drive.mount('/content/drive')

# Load Dataset (Update path if using Google Drive)
file_path = "cryptography_dataset_enhanced.csv"
df = pd.read_csv(file_path)

# Handle missing values
df = df.dropna(subset=["Key"]).reset_index(drop=True)

# Encode Algorithm labels
label_encoder = LabelEncoder()
df["Algorithm_Label"] = label_encoder.fit_transform(df["Algorithm"])

# Convert Ciphertext to fixed-length ASCII representation
def text_to_ascii(text, max_length=128):
    ascii_vals = [ord(c) for c in text[:max_length]]
    if len(ascii_vals) < max_length:
        ascii_vals.extend([0] * (max_length - len(ascii_vals)))
    return np.array(ascii_vals)

ciphertext_numeric = np.array([text_to_ascii(text, 128) for text in df["Ciphertext"]])

# Standardize the data
scaler = StandardScaler()
ciphertext_scaled = scaler.fit_transform(ciphertext_numeric)

# Reshape for CNN-LSTM input
X = ciphertext_scaled.reshape(ciphertext_scaled.shape[0], 128, 1)
y = df["Algorithm_Label"].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define CNN + LSTM Model
model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(128, 1)),
    Dropout(0.3),
    LSTM(64, return_sequences=True),
    LSTM(32),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model
epochs = 20
batch_size = 32
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

# Evaluate model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Final Test Accuracy: {test_acc * 100:.2f}%')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 170ms/step - accuracy: 0.2744 - loss: 1.7648 - val_accuracy: 0.2723 - val_loss: 1.6213
Epoch 2/20
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 159ms/step - accuracy: 0.2982 - loss: 1.5785 - val_accuracy: 0.2838 - val_loss: 1.6013
Epoch 3/20
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 158ms/step - accuracy: 0.2933 - loss: 1.5522 - val_accuracy: 0.2838 - val_loss: 1.5964
Epoch 4/20
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 140ms/step - accuracy: 0.3091 - loss: 1.5389 - val_accuracy: 0.2540 - val_loss: 1.5961
Epoch 5/20
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 155ms/step - accuracy: 0.3172 - loss: 1.5318 - val_accuracy: 0.2540 - val_loss: 1.5894
Epoch 6/20
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 160ms/step - accuracy: 0.2884 - loss: 1.5396 - val_accuracy: 0.2838 - val_loss: 1.5861
Epoch 7/20
[1m55/55[0m

CNN + LSTM (V2)

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Bidirectional
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from collections import Counter

# Load Dataset
file_path = "cryptography_dataset_enhanced.csv"
df = pd.read_csv(file_path)

# Handle missing values
df = df.dropna(subset=["Ciphertext"]).reset_index(drop=True)

# Encode Algorithm labels
label_encoder = LabelEncoder()
df["Algorithm_Label"] = label_encoder.fit_transform(df["Algorithm"])

# Convert Ciphertext to N-gram frequency representation
def ngram_frequencies(text, n=3, max_length=128):
    text = text[:max_length]  # Truncate if needed
    ngrams = [text[i:i+n] for i in range(len(text)-n+1)]  # Generate n-grams
    freq = Counter(ngrams)  # Count occurrences
    vector = np.zeros(max_length)  # Fixed-length vector
    for i, (gram, count) in enumerate(freq.items()):
        if i < max_length:
            vector[i] = count  # Assign frequency
    return vector

ciphertext_features = np.array([ngram_frequencies(text, n=3) for text in df["Ciphertext"]])

# Standardize the data
scaler = StandardScaler()
ciphertext_scaled = scaler.fit_transform(ciphertext_features)

# Reshape for CNN input
X = ciphertext_scaled.reshape(ciphertext_scaled.shape[0], 128, 1)
y = df["Algorithm_Label"].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define CNN + BiLSTM Model
model = Sequential([
    Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=(128, 1)),
    Dropout(0.3),
    Bidirectional(LSTM(64, return_sequences=True)),
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train model
epochs = 50
batch_size = 32
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

# Evaluate model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Final Test Accuracy: {test_acc * 100:.2f}%')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 318ms/step - accuracy: 0.2721 - loss: 1.8880 - val_accuracy: 0.4240 - val_loss: 1.2506
Epoch 2/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 324ms/step - accuracy: 0.3989 - loss: 1.2311 - val_accuracy: 0.4340 - val_loss: 1.1706
Epoch 3/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 289ms/step - accuracy: 0.4430 - loss: 1.1716 - val_accuracy: 0.5940 - val_loss: 0.9287
Epoch 4/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 307ms/step - accuracy: 0.5152 - loss: 0.9780 - val_accuracy: 0.5920 - val_loss: 0.7665
Epoch 5/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 296ms/step - accuracy: 0.5161 - loss: 0.8102 - val_accuracy: 0.5620 - val_loss: 0.7441
Epoch 6/50
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 288ms/step - accuracy: 0.5287 - loss: 0.7782 - val_accuracy: 0.5620 - val_loss: 0.7395
Epoch 7/50
[1m63/63[

CNN , LSTM , CNN+LSTM

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, GRU, Dense, Dropout, Flatten
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Load Dataset
file_path = "cryptography_dataset_enhanced.csv"
df = pd.read_csv(file_path)

# Handle missing values
df = df.dropna(subset=["Key"]).reset_index(drop=True)

# Encode Algorithm labels
label_encoder = LabelEncoder()
df["Algorithm_Label"] = label_encoder.fit_transform(df["Algorithm"])

# Convert Ciphertext to fixed-length ASCII representation
def text_to_ascii(text, max_length=128):
    ascii_vals = [ord(c) for c in text[:max_length]]
    if len(ascii_vals) < max_length:
        ascii_vals.extend([0] * (max_length - len(ascii_vals)))
    return np.array(ascii_vals)

ciphertext_numeric = np.array([text_to_ascii(text, 128) for text in df["Ciphertext"]])

# Standardize the data
scaler = StandardScaler()
ciphertext_scaled = scaler.fit_transform(ciphertext_numeric)

# Reshape for CNN/LSTM input
X = ciphertext_scaled.reshape(ciphertext_scaled.shape[0], 128, 1)
y = df["Algorithm_Label"].values

# Split dataset into five parts
split_size = len(df) // 5
X_splits = [X[i * split_size:(i + 1) * split_size] for i in range(5)]
y_splits = [y[i * split_size:(i + 1) * split_size] for i in range(5)]

# Define different models
def build_cnn_lstm():
    model = Sequential([
        Conv1D(64, kernel_size=3, activation='relu', input_shape=(128, 1)),
        Dropout(0.3),
        LSTM(64, return_sequences=True),
        LSTM(32),
        Dense(32, activation='relu'),
        Dropout(0.3),
        Dense(len(label_encoder.classes_), activation='softmax')
    ])
    return model

def build_lstm():
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=(128, 1)),
        LSTM(32),
        Dense(32, activation='relu'),
        Dropout(0.3),
        Dense(len(label_encoder.classes_), activation='softmax')
    ])
    return model

def build_cnn():
    model = Sequential([
        Conv1D(64, kernel_size=3, activation='relu', input_shape=(128, 1)),
        Flatten(),
        Dense(32, activation='relu'),
        Dropout(0.3),
        Dense(len(label_encoder.classes_), activation='softmax')
    ])
    return model

def build_mlp():
    model = Sequential([
        Flatten(input_shape=(128, 1)),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(len(label_encoder.classes_), activation='softmax')
    ])
    return model

def build_gru():
    model = Sequential([
        GRU(64, return_sequences=True, input_shape=(128, 1)),
        GRU(32),
        Dense(32, activation='relu'),
        Dropout(0.3),
        Dense(len(label_encoder.classes_), activation='softmax')
    ])
    return model

models = {
    "CNN-LSTM": build_cnn_lstm(),
    "LSTM": build_lstm(),
    "CNN": build_cnn(),
    "MLP": build_mlp(),
    "GRU": build_gru()
}

# Train and evaluate each model
epochs = 20
batch_size = 32
results = {}

for i, (X_part, y_part) in enumerate(zip(X_splits, y_splits)):
    print(f"\nTraining Model {list(models.keys())[i]} on Part {i+1} of dataset")
    X_train, X_test, y_train, y_test = train_test_split(X_part, y_part, test_size=0.2, random_state=42)

    model_name = list(models.keys())[i]
    model = models[model_name]
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

    test_loss, test_acc = model.evaluate(X_test, y_test)
    results[model_name] = test_acc
    print(f'Final Test Accuracy for {model_name}: {test_acc * 100:.2f}%')

# Display best model
best_model = max(results, key=results.get)
print(f'\nBest performing model: {best_model} with accuracy {results[best_model] * 100:.2f}%')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)



Training Model CNN-LSTM on Part 1 of dataset
Epoch 1/20


  super().__init__(**kwargs)


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 134ms/step - accuracy: 0.2713 - loss: 1.7839 - val_accuracy: 0.3066 - val_loss: 1.5547
Epoch 2/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 134ms/step - accuracy: 0.2612 - loss: 1.6255 - val_accuracy: 0.2930 - val_loss: 1.5370
Epoch 3/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 136ms/step - accuracy: 0.2801 - loss: 1.5886 - val_accuracy: 0.3047 - val_loss: 1.5338
Epoch 4/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 135ms/step - accuracy: 0.2752 - loss: 1.5809 - val_accuracy: 0.2930 - val_loss: 1.5233
Epoch 5/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 116ms/step - accuracy: 0.2817 - loss: 1.5885 - val_accuracy: 0.2812 - val_loss: 1.5190
Epoch 6/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 132ms/step - accuracy: 0.2831 - loss: 1.5701 - val_accuracy: 0.2930 - val_loss: 1.5170
Epoch 7/20
[1m64/64[0m [32m━━━━

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, GRU, Dense, Dropout, Flatten, BatchNormalization, MultiHeadAttention, Input
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Load Dataset
file_path = "cryptography_dataset_enhanced.csv"
df = pd.read_csv(file_path)

# Handle missing values
df = df.dropna(subset=["Key"]).reset_index(drop=True)

# Encode Algorithm labels
label_encoder = LabelEncoder()
df["Algorithm_Label"] = label_encoder.fit_transform(df["Algorithm"])

# Convert Ciphertext to fixed-length ASCII representation
def text_to_ascii(text, max_length=128):
    ascii_vals = [ord(c) for c in text[:max_length]]
    if len(ascii_vals) < max_length:
        ascii_vals.extend([0] * (max_length - len(ascii_vals)))
    return np.array(ascii_vals)

ciphertext_numeric = np.array([text_to_ascii(text, 128) for text in df["Ciphertext"]])

# Standardize the data
scaler = StandardScaler()
ciphertext_scaled = scaler.fit_transform(ciphertext_numeric)

# Reshape for CNN/LSTM input
X = ciphertext_scaled.reshape(ciphertext_scaled.shape[0], 128, 1)
y = df["Algorithm_Label"].values

# Split dataset into five parts
split_size = len(df) // 5
X_splits = [X[i * split_size:(i + 1) * split_size] for i in range(5)]
y_splits = [y[i * split_size:(i + 1) * split_size] for i in range(5)]

# Define improved models with attention and batch normalization
def build_cnn_lstm():
    inputs = Input(shape=(128, 1))
    x = Conv1D(128, kernel_size=5, activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)
    x = LSTM(128, return_sequences=True)(x)
    x = MultiHeadAttention(num_heads=2, key_dim=64)(x, x)
    x = LSTM(64)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    outputs = Dense(len(label_encoder.classes_), activation='softmax')(x)
    return tf.keras.Model(inputs, outputs)

def build_lstm():
    inputs = Input(shape=(128, 1))
    x = LSTM(128, return_sequences=True)(inputs)
    x = MultiHeadAttention(num_heads=2, key_dim=64)(x, x)
    x = LSTM(64)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    outputs = Dense(len(label_encoder.classes_), activation='softmax')(x)
    return tf.keras.Model(inputs, outputs)

def build_cnn():
    model = Sequential([
        Conv1D(128, kernel_size=5, activation='relu', input_shape=(128, 1)),
        BatchNormalization(),
        Conv1D(64, kernel_size=3, activation='relu'),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(len(label_encoder.classes_), activation='softmax')
    ])
    return model

def build_mlp():
    model = Sequential([
        Flatten(input_shape=(128, 1)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dense(len(label_encoder.classes_), activation='softmax')
    ])
    return model

def build_gru():
    inputs = Input(shape=(128, 1))
    x = GRU(128, return_sequences=True)(inputs)
    x = MultiHeadAttention(num_heads=2, key_dim=64)(x, x)
    x = GRU(64)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    outputs = Dense(len(label_encoder.classes_), activation='softmax')(x)
    return tf.keras.Model(inputs, outputs)

models = {
    "CNN-LSTM": build_cnn_lstm(),
    "LSTM": build_lstm(),
    "CNN": build_cnn(),
    "MLP": build_mlp(),
    "GRU": build_gru()
}

# Train and evaluate each model with improved settings
epochs = 30
batch_size = 64
results = {}

for i, (X_part, y_part) in enumerate(zip(X_splits, y_splits)):
    print(f"\nTraining Model {list(models.keys())[i]} on Part {i+1} of dataset")
    X_train, X_test, y_train, y_test = train_test_split(X_part, y_part, test_size=0.2, random_state=42)

    model_name = list(models.keys())[i]
    model = models[model_name]
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

    test_loss, test_acc = model.evaluate(X_test, y_test)
    results[model_name] = test_acc
    print(f'Final Test Accuracy for {model_name}: {test_acc * 100:.2f}%')

# Display best model
best_model = max(results, key=results.get)
print(f'\nBest performing model: {best_model} with accuracy {results[best_model] * 100:.2f}%')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)



Training Model CNN-LSTM on Part 1 of dataset
Epoch 1/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 713ms/step - accuracy: 0.2927 - loss: 1.7184 - val_accuracy: 0.2916 - val_loss: 1.8183
Epoch 2/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 660ms/step - accuracy: 0.4944 - loss: 1.0424 - val_accuracy: 0.1735 - val_loss: 3.8069
Epoch 3/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 660ms/step - accuracy: 0.5389 - loss: 0.8920 - val_accuracy: 0.4481 - val_loss: 2.0388
Epoch 4/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 640ms/step - accuracy: 0.5579 - loss: 0.7734 - val_accuracy: 0.4381 - val_loss: 2.3279
Epoch 5/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 655ms/step - accuracy: 0.5725 - loss: 0.7413 - val_accuracy: 0.4438 - val_loss: 2.4182
Epoch 6/30
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 717ms/step - accuracy: 0.5641 - loss: 0.7395 - val_accuracy: 0.

 Complement Naive Bayes

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.naive_bayes import ComplementNB
from sklearn.metrics import accuracy_score

# Load Dataset
file_path = "cryptography_dataset_enhanced.csv"
df = pd.read_csv(file_path)

# Handle missing values
df = df.dropna(subset=["Key"]).reset_index(drop=True)

# Encode Algorithm labels
label_encoder = LabelEncoder()
df["Algorithm_Label"] = label_encoder.fit_transform(df["Algorithm"])

# Convert Ciphertext to fixed-length ASCII representation
def text_to_ascii(text, max_length=128):
    ascii_vals = [ord(c) for c in text[:max_length]]
    if len(ascii_vals) < max_length:
        ascii_vals.extend([0] * (max_length - len(ascii_vals)))
    return np.array(ascii_vals)

ciphertext_numeric = np.array([text_to_ascii(text, 128) for text in df["Ciphertext"]])

# Standardize the data
scaler = StandardScaler()
ciphertext_scaled = scaler.fit_transform(ciphertext_numeric)

# Prepare input and labels
X = ciphertext_scaled
y = df["Algorithm_Label"].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a separate dataset for ComplementNB (ensuring non-negative values)
minmax_scaler = MinMaxScaler()
X_train_nb = minmax_scaler.fit_transform(X_train)
X_test_nb = minmax_scaler.transform(X_test)

# Define classifiers
models = {
    "J48 (Decision Tree)": DecisionTreeClassifier(),
    "FT (Functional Trees)": ExtraTreesClassifier(n_estimators=100),
    "PART (Rule-based Classifier)": DecisionTreeClassifier(splitter='random'),
    "Complement Naive Bayes": ComplementNB(),
    "Multilayer Perceptron": Sequential([
        Flatten(input_shape=(128,)),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dense(len(label_encoder.classes_), activation='softmax')
    ])
}

# Train and evaluate each model
results = {}
for model_name, model in models.items():
    print(f"\nTraining {model_name}...")

    if model_name == "Multilayer Perceptron":
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test), verbose=1)
        test_loss, test_acc = model.evaluate(X_test, y_test)
        results[model_name] = test_acc
    elif model_name == "Complement Naive Bayes":
        model.fit(X_train_nb, y_train)
        y_pred = model.predict(X_test_nb)
        acc = accuracy_score(y_test, y_pred)
        results[model_name] = acc
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        results[model_name] = acc

    print(f'Final Test Accuracy for {model_name}: {results[model_name] * 100:.2f}%')

# Display best model
best_model = max(results, key=results.get)
print(f'\nBest performing model: {best_model} with accuracy {results[best_model] * 100:.2f}%')


  super().__init__(**kwargs)



Training J48 (Decision Tree)...
Final Test Accuracy for J48 (Decision Tree): 57.35%

Training FT (Functional Trees)...
Final Test Accuracy for FT (Functional Trees): 57.78%

Training PART (Rule-based Classifier)...
Final Test Accuracy for PART (Rule-based Classifier): 57.27%

Training Complement Naive Bayes...
Final Test Accuracy for Complement Naive Bayes: 57.81%

Training Multilayer Perceptron...
Epoch 1/20
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.5149 - loss: 1.0119 - val_accuracy: 0.5764 - val_loss: 0.6671
Epoch 2/20
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5892 - loss: 0.6820 - val_accuracy: 0.5735 - val_loss: 0.6637
Epoch 3/20
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.5872 - loss: 0.6778 - val_accuracy: 0.5670 - val_loss: 0.6637
Epoch 4/20
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5990 - loss

CNN-LSTM model

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Flatten, BatchNormalization, Embedding
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

# Load Dataset
file_path = "cryptography_dataset_enhanced.csv"
df = pd.read_csv(file_path)

# Handle missing values
df = df.dropna(subset=["Key"]).reset_index(drop=True)

# Encode Algorithm labels
label_encoder = LabelEncoder()
df["Algorithm_Label"] = label_encoder.fit_transform(df["Algorithm"])

# Convert Ciphertext using TF-IDF instead of ASCII encoding
vectorizer = TfidfVectorizer(max_features=128)
X_tfidf = vectorizer.fit_transform(df["Ciphertext"]).toarray()

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_tfidf)

# Reshape for CNN-LSTM input
X = X_scaled.reshape(X_scaled.shape[0], 128, 1)
y = df["Algorithm_Label"].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define improved CNN-LSTM model
def build_cnn_lstm():
    model = Sequential([
        Conv1D(256, kernel_size=5, activation='relu', input_shape=(128, 1)),
        BatchNormalization(),
        Dropout(0.3),
        Conv1D(128, kernel_size=3, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        LSTM(128, return_sequences=True, recurrent_dropout=0.2),
        LSTM(64, recurrent_dropout=0.2),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(len(label_encoder.classes_), activation='softmax')
    ])
    return model

# Build and train the model
model = build_cnn_lstm()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=1.0), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model with optimized settings
epochs = 50
batch_size = 64
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Final Test Accuracy: {test_acc * 100:.2f}%')
..........

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 858ms/step - accuracy: 0.1413 - loss: 1.9467 - val_accuracy: 0.1474 - val_loss: 1.9455
Epoch 2/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m170s[0m 716ms/step - accuracy: 0.1412 - loss: 1.9462 - val_accuracy: 0.1511 - val_loss: 1.9456
Epoch 3/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 731ms/step - accuracy: 0.1448 - loss: 1.9463 - val_accuracy: 0.1445 - val_loss: 1.9453
Epoch 4/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 715ms/step - accuracy: 0.1466 - loss: 1.9462 - val_accuracy: 0.1405 - val_loss: 1.9453
Epoch 5/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 725ms/step - accuracy: 0.1479 - loss: 1.9457 - val_accuracy: 0.1528 - val_loss: 1.9448
Epoch 6/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 726ms/step - accuracy: 0.1494 - loss: 1.9458 - val_accuracy: 0.1528 - val_loss: 1.9449
Epoc

CNN-BiLSTM model

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, BatchNormalization, Bidirectional, Flatten, Input
from tensorflow.keras.regularizers import l2
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

# Load Dataset
file_path = "cryptography_dataset_enhanced.csv"
df = pd.read_csv(file_path)

# Handle missing values
df = df.dropna(subset=["Key"]).reset_index(drop=True)

# Encode Algorithm labels
label_encoder = LabelEncoder()
df["Algorithm_Label"] = label_encoder.fit_transform(df["Algorithm"])

# Convert Ciphertext using TF-IDF with more features
vectorizer = TfidfVectorizer(max_features=256)  # Increased features
X_tfidf = vectorizer.fit_transform(df["Ciphertext"]).toarray()

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_tfidf)

# Reshape for CNN-BiLSTM input
X = X_scaled.reshape(X_scaled.shape[0], 256, 1)
y = df["Algorithm_Label"].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define optimized CNN-BiLSTM model
def build_optimized_model():
    model = Sequential([
        Input(shape=(256, 1)),
        Conv1D(256, kernel_size=5, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.3),
        Conv1D(128, kernel_size=3, activation='relu', kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.3),
        Bidirectional(LSTM(128, return_sequences=True, recurrent_dropout=0.2)),
        Bidirectional(LSTM(64, recurrent_dropout=0.2)),
        Flatten(),
        Dense(128, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.3),
        Dense(len(label_encoder.classes_), activation='softmax')
    ])
    return model

# Build and compile the model with RMSprop optimizer
model = build_optimized_model()
model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0005, clipnorm=1.0),
              loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model with optimized settings
epochs = 50
batch_size = 64
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Final Test Accuracy: {test_acc * 100:.2f}%')


Epoch 1/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m751s[0m 3s/step - accuracy: 0.1421 - loss: 2.1843 - val_accuracy: 0.1431 - val_loss: 2.0528
Epoch 2/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m742s[0m 3s/step - accuracy: 0.1444 - loss: 2.0301 - val_accuracy: 0.1496 - val_loss: 1.9795
Epoch 3/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m726s[0m 3s/step - accuracy: 0.1493 - loss: 1.9727 - val_accuracy: 0.1499 - val_loss: 1.9537
Epoch 4/50
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m685s[0m 3s/step - accuracy: 0.1451 - loss: 1.9553 - val_accuracy: 0.1607 - val_loss: 1.9469
Epoch 5/50
[1m212/220[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m23s[0m 3s/step - accuracy: 0.1521 - loss: 1.9485

Random-Forest-Classifier

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import base64

# Load dataset
file_path = "cryptography_dataset_enhanced (1).csv"
df = pd.read_csv(file_path)

# Drop unnecessary columns (Plaintext and Key are irrelevant for classification)
df = df.drop(columns=["Plaintext", "Key"])

# Function to extract byte frequency histogram from Ciphertext
def extract_histogram(ciphertext):
    try:
        decoded_bytes = base64.b64decode(ciphertext, validate=True)  # Convert Base64 to bytes
    except:
        decoded_bytes = ciphertext.encode()  # If not Base64, use direct encoding

    histogram = np.zeros(256)  # Initialize histogram with 256 bins (0-255 byte values)
    for byte in decoded_bytes:
        histogram[byte] += 1  # Count occurrences of each byte
    return histogram

# Apply histogram extraction to all Ciphertext entries
histograms = np.array([extract_histogram(ct) for ct in df["Ciphertext"]])

# Convert to DataFrame
feature_columns = [f"Byte_{i}" for i in range(256)]
hist_df = pd.DataFrame(histograms, columns=feature_columns)

# Add the Algorithm labels
hist_df["Algorithm"] = df["Algorithm"]

# Split dataset into features (X) and labels (y)
X = hist_df.drop(columns=["Algorithm"])
y = hist_df["Algorithm"]

# Split into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=200, random_state=42)  # Increased trees to improve accuracy
print("\nTraining the improved model...")
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nImproved Model Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Training the improved model...

Improved Model Accuracy: 0.41

Classification Report:
              precision    recall  f1-score   support

        3DES       0.23      0.19      0.21        67
         AES       0.40      0.34      0.37        67
    Blowfish       0.12      0.09      0.10        66
    ChaCha20       0.16      0.29      0.21        48
         DES       0.11      0.08      0.09        52
         RC4       0.24      0.27      0.25        77
         RSA       1.00      1.00      1.00        68
     SHA-256       0.95      1.00      0.97        55

    accuracy                           0.41       500
   macro avg       0.40      0.41      0.40       500
weighted avg       0.40      0.41      0.40       500

