In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Input, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
f128 = pd.read_csv('f128_2_dataset.csv')
f256_2 = pd.read_csv('f256_2_dataset.csv')
f256_4 = pd.read_csv('f256_4_dataset.csv')
f512_4 = pd.read_csv('f512_4_dataset.csv')
f512_8 = pd.read_csv('f512_8_dataset.csv')
f1024 = pd.read_csv('f1024_8_dataset.csv')

print("Datasets loaded successfully.")

Datasets loaded successfully.


In [4]:
# Function to create model with different input shapes
def create_model(input_shape):
    model = Sequential()

    # Input layer
    model.add(Input(shape=input_shape))

    # Convolutional layers
    model.add(Conv1D(128, 3, activation='relu'))
    model.add(MaxPooling1D())
    model.add(Conv1D(128, 3, activation='relu'))
    model.add(MaxPooling1D())
    model.add(Dropout(0.2))

    # Fully connected layers
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', 'precision', 'recall'])

    return model

# Pre-create models for each dataset size
model_128 = create_model(input_shape=(128, 1))
model_256 = create_model(input_shape=(256, 1))
model_512 = create_model(input_shape=(512, 1))
model_1024 = create_model(input_shape=(1024, 1))
print("Models created successfully.")

Models created successfully.


In [5]:
# Functions to remove redudant code as encoded_url.ipynb
def convert_and_split (data, labels, test_size=0.3, val_size=1/3, random_state=42):
    # Map binary strings to integers
    X = np.array([list(map(int, x)) for x in data])
    y = labels

    # Split dataset
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=test_size, random_state=random_state)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=test_size, random_state=random_state)

    return X_train, y_train, X_test, y_test, X_val, y_val

def train_and_evaluate_model(model, X_train, y_train, X_test, y_test, X_val, y_val, epochs=8, batch_size=128):


    # Train
    history = model.fit(
        X_train, 
        y_train, 
        epochs=epochs, 
        batch_size=batch_size, 
        validation_data=(X_val, y_val)
    )

    # Evaluate
    loss, accuracy, precision, recall = model.evaluate(X_test, y_test)

    # Print results
    print(f"Accuracy: {round(accuracy, 4)}")
    print(f"Precision: {round(precision, 4)}")
    print(f"Recall: {round(recall, 4)}")
    print(f"Loss: {round(loss, 4)}")

    # Return history for graph plotting
    return history.history

In [8]:
# l=128, k=2 dataset evaluation
X_128 = f128['feature_encodings']
y_128 = f128['label']

# Split the dataset
X_train, y_train, X_test, y_test, X_val, y_val = convert_and_split(X_128, y_128)
print("Data split successfully")

# Train and evaluate the model
results_128 = train_and_evaluate_model(model_128, X_train, y_train, X_test, y_test, X_val, y_val)

Data split successfully
Epoch 1/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 12ms/step - accuracy: 0.8175 - loss: 0.3804 - precision: 0.7569 - recall: 0.8676 - val_accuracy: 0.8313 - val_loss: 0.3564 - val_precision: 0.7750 - val_recall: 0.8744
Epoch 2/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 12ms/step - accuracy: 0.8305 - loss: 0.3575 - precision: 0.7727 - recall: 0.8796 - val_accuracy: 0.8332 - val_loss: 0.3529 - val_precision: 0.7718 - val_recall: 0.8869
Epoch 3/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 12ms/step - accuracy: 0.8302 - loss: 0.3557 - precision: 0.7720 - recall: 0.8773 - val_accuracy: 0.8330 - val_loss: 0.3517 - val_precision: 0.7738 - val_recall: 0.8824
Epoch 4/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 12ms/step - accuracy: 0.8309 - loss: 0.3543 - precision: 0.7725 - recall: 0.8792 - val_accuracy: 0.8317 - val_loss: 0.3545 - val_precision: 0.7589 - val_rec

In [9]:
# l=256, k=2 dataset evaluation
X_256_2 = f256_2['feature_encodings']
y_256_2 = f256_2['label']

# Split the dataset
X_train, y_train, X_test, y_test, X_val, y_val = convert_and_split(X_256_2, y_256_2)
print("Data split successfully")

# Train and evaluate the model
results_256_2 = train_and_evaluate_model(model_256, X_train, y_train, X_test, y_test, X_val, y_val)

Data split successfully
Epoch 1/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 25ms/step - accuracy: 0.8201 - loss: 0.3807 - precision: 0.7687 - recall: 0.8513 - val_accuracy: 0.8401 - val_loss: 0.3501 - val_precision: 0.7832 - val_recall: 0.8853
Epoch 2/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 25ms/step - accuracy: 0.8385 - loss: 0.3515 - precision: 0.7871 - recall: 0.8741 - val_accuracy: 0.8394 - val_loss: 0.3485 - val_precision: 0.7701 - val_recall: 0.9107
Epoch 3/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 25ms/step - accuracy: 0.8403 - loss: 0.3496 - precision: 0.7898 - recall: 0.8743 - val_accuracy: 0.8454 - val_loss: 0.3426 - val_precision: 0.7931 - val_recall: 0.8823
Epoch 4/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 25ms/step - accuracy: 0.8410 - loss: 0.3478 - precision: 0.7910 - recall: 0.8739 - val_accuracy: 0.8455 - val_loss: 0.3415 - val_precision: 0.7877 - val_rec

In [10]:
# l=256, k=4 dataset evaluation
X_256_4 = f256_4['feature_encodings']
y_256_4 = f256_4['label']

# Split the dataset
X_train, y_train, X_test, y_test, X_val, y_val = convert_and_split(X_256_4, y_256_4)
print("Data split successfully")

# Train and evaluate the model
results_256_4 = train_and_evaluate_model(model_256, X_train, y_train, X_test, y_test, X_val, y_val)

Data split successfully
Epoch 1/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 25ms/step - accuracy: 0.8350 - loss: 0.3565 - precision: 0.7808 - recall: 0.8759 - val_accuracy: 0.8460 - val_loss: 0.3407 - val_precision: 0.8012 - val_recall: 0.8693
Epoch 2/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 25ms/step - accuracy: 0.8427 - loss: 0.3449 - precision: 0.7911 - recall: 0.8792 - val_accuracy: 0.8449 - val_loss: 0.3420 - val_precision: 0.7873 - val_recall: 0.8921
Epoch 3/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 25ms/step - accuracy: 0.8431 - loss: 0.3443 - precision: 0.7907 - recall: 0.8813 - val_accuracy: 0.8455 - val_loss: 0.3391 - val_precision: 0.7806 - val_recall: 0.9075
Epoch 4/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 25ms/step - accuracy: 0.8440 - loss: 0.3414 - precision: 0.7915 - recall: 0.8804 - val_accuracy: 0.8469 - val_loss: 0.3381 - val_precision: 0.7898 - val_rec

In [11]:
# l=512, k=4 dataset evaluation
X_512_4 = f512_4['feature_encodings']
y_512_4 = f512_4['label']

# Split the dataset
X_train, y_train, X_test, y_test, X_val, y_val = convert_and_split(X_512_4, y_512_4)
print("Data split successfully")

# Train and evaluate the model
results_512_4 = train_and_evaluate_model(model_512, X_train, y_train, X_test, y_test, X_val, y_val)

Data split successfully
Epoch 1/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 55ms/step - accuracy: 0.8236 - loss: 0.3745 - precision: 0.7663 - recall: 0.8703 - val_accuracy: 0.8432 - val_loss: 0.3441 - val_precision: 0.8020 - val_recall: 0.8596
Epoch 2/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 55ms/step - accuracy: 0.8414 - loss: 0.3474 - precision: 0.7879 - recall: 0.8816 - val_accuracy: 0.8445 - val_loss: 0.3431 - val_precision: 0.7786 - val_recall: 0.9084
Epoch 3/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 55ms/step - accuracy: 0.8425 - loss: 0.3438 - precision: 0.7894 - recall: 0.8833 - val_accuracy: 0.8457 - val_loss: 0.3400 - val_precision: 0.8019 - val_recall: 0.8669
Epoch 4/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 55ms/step - accuracy: 0.8436 - loss: 0.3432 - precision: 0.7909 - recall: 0.8817 - val_accuracy: 0.8475 - val_loss: 0.3386 - val_precision: 0.7981 - val

In [12]:
# l=512, k=8 dataset evaluation
X_512_8 = f512_8['feature_encodings']
y_512_8 = f512_8['label']

# Split the dataset
X_train, y_train, X_test, y_test, X_val, y_val = convert_and_split(X_512_8, y_512_8)
print("Data split successfully")

# Train and evaluate the model
results_512_8 = train_and_evaluate_model(model_512, X_train, y_train, X_test, y_test, X_val, y_val)

Data split successfully
Epoch 1/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 55ms/step - accuracy: 0.8389 - loss: 0.3506 - precision: 0.7846 - recall: 0.8798 - val_accuracy: 0.8460 - val_loss: 0.3392 - val_precision: 0.8011 - val_recall: 0.8693
Epoch 2/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 61ms/step - accuracy: 0.8436 - loss: 0.3427 - precision: 0.7905 - recall: 0.8820 - val_accuracy: 0.8473 - val_loss: 0.3372 - val_precision: 0.7889 - val_recall: 0.8965
Epoch 3/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 60ms/step - accuracy: 0.8452 - loss: 0.3390 - precision: 0.7930 - recall: 0.8825 - val_accuracy: 0.8467 - val_loss: 0.3371 - val_precision: 0.8007 - val_recall: 0.8722
Epoch 4/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m152s[0m 59ms/step - accuracy: 0.8440 - loss: 0.3398 - precision: 0.7927 - recall: 0.8812 - val_accuracy: 0.8462 - val_loss: 0.3388 - val_precision: 0.7801 - val

In [13]:
# l=1024 k=8 dataset evaluation
X_1024 = f1024['feature_encodings']
y_1024 = f1024['label']

# Split the dataset
X_train, y_train, X_test, y_test, X_val, y_val = convert_and_split(X_1024, y_1024)
print("Data split successfully")

# Train and evaluate the model
results_1024 = train_and_evaluate_model(model_1024, X_train, y_train, X_test, y_test, X_val, y_val)

Data split successfully
Epoch 1/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 115ms/step - accuracy: 0.8014 - loss: 0.4033 - precision: 0.7518 - recall: 0.8226 - val_accuracy: 0.8367 - val_loss: 0.3507 - val_precision: 0.7823 - val_recall: 0.8765
Epoch 2/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m287s[0m 112ms/step - accuracy: 0.8359 - loss: 0.3554 - precision: 0.7802 - recall: 0.8797 - val_accuracy: 0.8385 - val_loss: 0.3485 - val_precision: 0.7671 - val_recall: 0.9144
Epoch 3/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m287s[0m 112ms/step - accuracy: 0.8393 - loss: 0.3500 - precision: 0.7844 - recall: 0.8821 - val_accuracy: 0.8433 - val_loss: 0.3460 - val_precision: 0.7860 - val_recall: 0.8896
Epoch 4/8
[1m2562/2562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m288s[0m 112ms/step - accuracy: 0.8401 - loss: 0.3490 - precision: 0.7898 - recall: 0.8740 - val_accuracy: 0.8452 - val_loss: 0.3433 - val_precision: 0.7920 -