<a href="https://colab.research.google.com/github/Varshiga-tn7/Projects/blob/main/GRU_with_BMA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Data Preprocessing**

In [4]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder

# Step 1: Load the dataset as a CSV file
file_path = 'dataset_phishing.csv.xls'
data = pd.read_csv(file_path)

print("File loaded successfully as CSV!")
print(data.head())  # Display first 5 rows

# Step 2: Handle missing values (if any)
data = data.dropna()
print(f"Dataset after dropping missing values: {data.shape}")

# Step 3: Encode the labels (convert 'benign' and 'malicious' to 0s and 1s)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data['status'])  # Convert 'benign'/'malicious' to 0/1

# Step 4: Text feature extraction using TF-IDF Vectorizer
vectorizer = TfidfVectorizer(max_features=500)  # Limit to 500 features
X = vectorizer.fit_transform(data['url']).toarray()

print(f"TF-IDF transformation complete. Feature shape: {X.shape}")

# Step 5: Split into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Data Splitting Complete:")
print(f"Training Data: {X_train.shape}, Testing Data: {X_test.shape}")

# Step 6: Save the preprocessed data
np.save('X_train.npy', X_train)
np.save('X_test.npy', X_test)
np.save('y_train.npy', y_train)
np.save('y_test.npy', y_test)

print("Preprocessing Done! Files saved as X_train.npy, X_test.npy, y_train.npy, y_test.npy.")


File loaded successfully as CSV!
                                                 url     status
0                                   br-icloud.com.br  malicious
1                mp3raid.com/music/krizz_kaliko.html     benign
2                    bopsecrets.org/rexroth/cr/1.htm     benign
3  http://www.garage-pirenne.be/index.php?option=...  malicious
4  http://adventure-nicaragua.net/index.php?optio...  malicious
Dataset after dropping missing values: (211954, 2)
TF-IDF transformation complete. Feature shape: (211954, 500)
Data Splitting Complete:
Training Data: (169563, 500), Testing Data: (42391, 500)
Preprocessing Done! Files saved as X_train.npy, X_test.npy, y_train.npy, y_test.npy.


**Building GRU Model**

In [5]:
# Import necessary libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Step 1: Load the preprocessed data
X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')

print("Preprocessed data loaded successfully.")
print(f"Training data shape: {X_train.shape}, Testing data shape: {X_test.shape}")

# Step 2: Reshape the input for GRU
# GRU expects 3D input: (samples, timesteps, features)
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Step 3: Build the GRU model
model = Sequential([
    GRU(64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False),
    Dropout(0.3),  # Prevent overfitting
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification output
])

# Step 4: Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 5: Train the GRU model
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

print("Training GRU model...")
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=20,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=1
)

# Step 6: Evaluate the model
print("\nEvaluating the model on test data...")
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


Preprocessed data loaded successfully.
Training data shape: (169563, 500), Testing data shape: (42391, 500)


  super().__init__(**kwargs)


Training GRU model...
Epoch 1/20
[1m2120/2120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 8ms/step - accuracy: 0.9138 - loss: 0.2314 - val_accuracy: 0.9503 - val_loss: 0.1261
Epoch 2/20
[1m2120/2120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 8ms/step - accuracy: 0.9496 - loss: 0.1276 - val_accuracy: 0.9543 - val_loss: 0.1207
Epoch 3/20
[1m2120/2120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 7ms/step - accuracy: 0.9536 - loss: 0.1195 - val_accuracy: 0.9564 - val_loss: 0.1157
Epoch 4/20
[1m2120/2120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 9ms/step - accuracy: 0.9566 - loss: 0.1167 - val_accuracy: 0.9590 - val_loss: 0.1135
Epoch 5/20
[1m2120/2120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 8ms/step - accuracy: 0.9586 - loss: 0.1121 - val_accuracy: 0.9592 - val_loss: 0.1105
Epoch 6/20
[1m2120/2120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 0.9589 - loss: 0.1095 - val_accuracy: 0.9602 - val_l

**GRU with Binary Monkey Optimization Algorithm**

In [6]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Dropout
from sklearn.model_selection import train_test_split

# Load preprocessed data
X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')

# Reshape data for GRU
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Define the BNA Hyperparameter Optimization Function
def evaluate_model(params):
    gru_units, dropout_rate, batch_size, learning_rate = params

    # Convert binary params to actual values
    gru_units = [32, 64, 128][gru_units]
    dropout_rate = [0.2, 0.3, 0.4][dropout_rate]
    batch_size = [32, 64, 128][batch_size]
    learning_rate = [0.001, 0.01, 0.1][learning_rate]

    # Build the GRU model
    model = Sequential([
        GRU(gru_units, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(dropout_rate),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    # Compile model
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    # Train model
    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=5, verbose=0)

    # Evaluate model
    _, accuracy = model.evaluate(X_test, y_test, verbose=0)
    return accuracy

# BNA Algorithm Implementation
def binary_monkey_algorithm(population_size, iterations):
    # Initialize population (random binary hyperparameters)
    population = np.random.randint(0, 2, size=(population_size, 4))  # 4 hyperparameters
    best_solution = None
    best_fitness = 0

    for iteration in range(iterations):
        print(f"Iteration {iteration+1}/{iterations}")
        for i, monkey in enumerate(population):
            fitness = evaluate_model(monkey)
            print(f"  Monkey {i+1}: {monkey} -> Accuracy: {fitness:.4f}")

            # Update best solution
            if fitness > best_fitness:
                best_fitness = fitness
                best_solution = monkey.copy()

        # Monkeys "move" by randomly flipping binary values
        for monkey in population:
            for j in range(len(monkey)):
                if np.random.rand() < 0.3:  # Flip probability
                    monkey[j] = 1 - monkey[j]

    print("\nBest Solution Found:", best_solution)
    print("Best Accuracy:", best_fitness)
    return best_solution

# Run BNA to Optimize GRU Hyperparameters
best_params = binary_monkey_algorithm(population_size=5, iterations=10)

# Decode best hyperparameters
gru_units = [32, 64, 128][best_params[0]]
dropout_rate = [0.2, 0.3, 0.4][best_params[1]]
batch_size = [32, 64, 128][best_params[2]]
learning_rate = [0.001, 0.01, 0.1][best_params[3]]

print("\nOptimized Hyperparameters:")
print(f"GRU Units: {gru_units}")
print(f"Dropout Rate: {dropout_rate}")
print(f"Batch Size: {batch_size}")
print(f"Learning Rate: {learning_rate}")

# Retrain GRU Model with Optimized Hyperparameters
model = Sequential([
    GRU(gru_units, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(dropout_rate),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

print("\nRetraining GRU Model with Optimized Hyperparameters...")
model.fit(X_train, y_train, batch_size=batch_size, epochs=10, verbose=1)

# Final Evaluation
print("\nEvaluating Optimized Model...")
final_loss, final_accuracy = model.evaluate(X_test, y_test)
print(f"Final Test Loss: {final_loss:.4f}")
print(f"Final Test Accuracy: {final_accuracy:.4f}")


Iteration 1/10
  Monkey 1: [0 0 1 1] -> Accuracy: 0.9655
  Monkey 2: [0 0 0 0] -> Accuracy: 0.9623
  Monkey 3: [1 1 0 1] -> Accuracy: 0.9658
  Monkey 4: [1 1 0 1] -> Accuracy: 0.9657
  Monkey 5: [1 0 0 0] -> Accuracy: 0.9632
Iteration 2/10
  Monkey 1: [0 0 0 0] -> Accuracy: 0.9626
  Monkey 2: [0 0 0 0] -> Accuracy: 0.9624
  Monkey 3: [1 1 0 0] -> Accuracy: 0.9612
  Monkey 4: [1 1 0 0] -> Accuracy: 0.9623
  Monkey 5: [1 0 0 1] -> Accuracy: 0.9655
Iteration 3/10
  Monkey 1: [0 0 0 0] -> Accuracy: 0.9620
  Monkey 2: [0 0 0 0] -> Accuracy: 0.9613
  Monkey 3: [1 0 1 1] -> Accuracy: 0.9649
  Monkey 4: [1 1 0 0] -> Accuracy: 0.9627
  Monkey 5: [1 0 0 1] -> Accuracy: 0.9660
Iteration 4/10
  Monkey 1: [1 0 0 0] -> Accuracy: 0.9628
  Monkey 2: [0 0 0 0] -> Accuracy: 0.9619
  Monkey 3: [0 0 1 1] -> Accuracy: 0.9654
  Monkey 4: [1 0 0 0] -> Accuracy: 0.9630
  Monkey 5: [1 1 0 1] -> Accuracy: 0.9652
Iteration 5/10
  Monkey 1: [0 0 0 0] -> Accuracy: 0.9610
  Monkey 2: [1 0 0 0] -> Accuracy: 0.9625
 

**Saving The Model**

In [7]:
# Save the trained model
model.save('gru_phishing_model.h5')
print("Model saved successfully.")




Model saved successfully.
