In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Load the dataset
file_path = "Dataset-2.xlsx"  # Ensure this file is uploaded to Colab
xls = pd.ExcelFile(file_path)
df = pd.read_excel(xls, sheet_name='Worksheet')


In [None]:
# Preprocessing
df['status'] = df['status'].map({'legitimate': 0, 'phishing': 1})
df.drop(columns=['url'], inplace=True)
df.drop_duplicates(inplace=True)

In [None]:
# Selecting key features
selected_features = [
    'length_url', 'nb_dots', 'nb_hyphens', 'https_token', 'prefix_suffix',
    'random_domain', 'shortening_service', 'nb_redirection', 'web_traffic',
    'dns_record', 'google_index', 'page_rank', 'domain_age', 'status'
]
df_selected = df[selected_features]

In [None]:
# Splitting into features (X) and target (y)
X = df_selected.drop(columns=['status'])
y = df_selected['status']

In [None]:
# Normalize the data
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Reshaping for GRU
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [None]:
# Build the GRU model
gru_model = Sequential([
    GRU(64, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    Dropout(0.3),
    GRU(32),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

  super().__init__(**kwargs)


In [None]:
# Compile the model
gru_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
gru_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 23ms/step - accuracy: 0.7165 - loss: 0.5571 - val_accuracy: 0.8419 - val_loss: 0.3740
Epoch 2/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - accuracy: 0.8780 - loss: 0.3365 - val_accuracy: 0.8597 - val_loss: 0.3525
Epoch 3/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 23ms/step - accuracy: 0.8913 - loss: 0.3053 - val_accuracy: 0.8699 - val_loss: 0.3426
Epoch 4/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 23ms/step - accuracy: 0.8865 - loss: 0.3073 - val_accuracy: 0.8761 - val_loss: 0.3340
Epoch 5/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 18ms/step - accuracy: 0.8813 - loss: 0.3138 - val_accuracy: 0.8743 - val_loss: 0.3287
Epoch 6/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.8837 - loss: 0.3145 - val_accuracy: 0.8717 - val_loss: 0.3270
Epoch 7/20
[1m282

<keras.src.callbacks.history.History at 0x78c9e7972cd0>

In [None]:
# Evaluate the model
eval_results = gru_model.evaluate(X_test, y_test)
print(f"\nTest Loss: {eval_results[0]:.4f}, Test Accuracy: {eval_results[1]:.4f}")


[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.9050 - loss: 0.2723

Test Loss: 0.2778, Test Accuracy: 0.9014


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Conv1D, Flatten, Dense, Dropout, MaxPooling1D
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


In [None]:
# Load the dataset
file_path = "Dataset-2.xlsx"  # Ensure this file is uploaded to Colab
xls = pd.ExcelFile(file_path)
df = pd.read_excel(xls, sheet_name='Worksheet')

In [None]:
# Preprocessing
df['status'] = df['status'].map({'legitimate': 0, 'phishing': 1})
df.drop(columns=['url'], inplace=True)
df.drop_duplicates(inplace=True)


In [None]:
# Selecting key features
selected_features = [
    'length_url', 'nb_dots', 'nb_hyphens', 'https_token', 'prefix_suffix',
    'random_domain', 'shortening_service', 'nb_redirection', 'web_traffic',
    'dns_record', 'google_index', 'page_rank', 'domain_age', 'status'
]
df_selected = df[selected_features]


In [None]:
# Splitting into features (X) and target (y)
X = df_selected.drop(columns=['status'])
y = df_selected['status']

In [None]:
# Normalize the data
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Reshaping for GRU and CNN
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [None]:
# Build the GRU model
gru_model = Sequential([
    GRU(64, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    Dropout(0.3),
    GRU(32),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

  super().__init__(**kwargs)


In [None]:
# Compile the GRU model
gru_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the GRU model
gru_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 22ms/step - accuracy: 0.7593 - loss: 0.5541 - val_accuracy: 0.8490 - val_loss: 0.3733
Epoch 2/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 24ms/step - accuracy: 0.8730 - loss: 0.3398 - val_accuracy: 0.8668 - val_loss: 0.3541
Epoch 3/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 21ms/step - accuracy: 0.8908 - loss: 0.3195 - val_accuracy: 0.8708 - val_loss: 0.3413
Epoch 4/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 21ms/step - accuracy: 0.8883 - loss: 0.3129 - val_accuracy: 0.8579 - val_loss: 0.3431
Epoch 5/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 23ms/step - accuracy: 0.8930 - loss: 0.2961 - val_accuracy: 0.8757 - val_loss: 0.3211
Epoch 6/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - accuracy: 0.8851 - loss: 0.3087 - val_accuracy: 0.8774 - val_loss: 0.3136
Epoch 7/20
[1m282/

<keras.src.callbacks.history.History at 0x78c9e697d210>

In [None]:
# Evaluate the GRU model
eval_results_gru = gru_model.evaluate(X_test, y_test)
print(f"\nGRU Test Loss: {eval_results_gru[0]:.4f}, Test Accuracy: {eval_results_gru[1]:.4f}")

[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8994 - loss: 0.2813

GRU Test Loss: 0.2798, Test Accuracy: 0.8988


In [None]:
# Build the CNN model
cnn_model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Conv1D(32, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    Flatten(),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Compile the CNN model
cnn_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the CNN model
cnn_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.5817 - loss: 0.6808 - val_accuracy: 0.6590 - val_loss: 0.6478
Epoch 2/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6284 - loss: 0.6504 - val_accuracy: 0.6359 - val_loss: 0.6323
Epoch 3/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.6402 - loss: 0.6356 - val_accuracy: 0.6532 - val_loss: 0.6197
Epoch 4/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.6649 - loss: 0.6233 - val_accuracy: 0.6781 - val_loss: 0.6093
Epoch 5/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6696 - loss: 0.6137 - val_accuracy: 0.6781 - val_loss: 0.6038
Epoch 6/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.6816 - loss: 0.6085 - val_accuracy: 0.6727 - val_loss: 0.6077
Epoch 7/20
[1m282/282[0m 

<keras.src.callbacks.history.History at 0x78c9de8dd550>

In [None]:
# Evaluate the CNN model
eval_results_cnn = cnn_model.evaluate(X_test, y_test)
print(f"\nCNN Test Loss: {eval_results_cnn[0]:.4f}, Test Accuracy: {eval_results_cnn[1]:.4f}")


[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7409 - loss: 0.5349

CNN Test Loss: 0.5490, Test Accuracy: 0.7340


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Conv1D, Flatten, Dense, Dropout, MaxPooling1D
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Load the dataset
file_path = "Dataset-2.xlsx"  # Ensure this file is uploaded to Colab
xls = pd.ExcelFile(file_path)
df = pd.read_excel(xls, sheet_name='Worksheet')

In [None]:
# Preprocessing
df['status'] = df['status'].map({'legitimate': 0, 'phishing': 1})
df.drop(columns=['url'], inplace=True)
df.drop_duplicates(inplace=True)


In [None]:
# Selecting key features
selected_features = [
    'length_url', 'nb_dots', 'nb_hyphens', 'https_token', 'prefix_suffix',
    'random_domain', 'shortening_service', 'nb_redirection', 'web_traffic',
    'dns_record', 'google_index', 'page_rank', 'domain_age', 'status'
]
df_selected = df[selected_features]

In [None]:
# Splitting into features (X) and target (y)
X = df_selected.drop(columns=['status'])
y = df_selected['status']


In [None]:
# Normalize the data
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
# Reshaping for deep learning models
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [None]:
# Build the Hybrid CNN-GRU model
hybrid_model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    GRU(64, return_sequences=True),
    Dropout(0.3),
    GRU(32),
    Dropout(0.3),
    Flatten(),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Compile the Hybrid model
hybrid_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the Hybrid model
hybrid_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 18ms/step - accuracy: 0.7196 - loss: 0.5513 - val_accuracy: 0.8428 - val_loss: 0.3712
Epoch 2/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - accuracy: 0.8755 - loss: 0.3390 - val_accuracy: 0.8650 - val_loss: 0.3465
Epoch 3/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.8744 - loss: 0.3303 - val_accuracy: 0.8681 - val_loss: 0.3426
Epoch 4/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.8827 - loss: 0.3172 - val_accuracy: 0.8766 - val_loss: 0.3388
Epoch 5/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.8831 - loss: 0.3260 - val_accuracy: 0.8770 - val_loss: 0.3387
Epoch 6/20
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.8854 - loss: 0.3125 - val_accuracy: 0.8734 - val_loss: 0.3399
Epoch 7/20
[1m282/28

<keras.src.callbacks.history.History at 0x78c9df32acd0>

In [None]:
# Evaluate the Hybrid model
eval_results_hybrid = hybrid_model.evaluate(X_test, y_test)
print(f"\nHybrid CNN-GRU Test Loss: {eval_results_hybrid[0]:.4f}, Test Accuracy: {eval_results_hybrid[1]:.4f}")

[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8798 - loss: 0.3065

Hybrid CNN-GRU Test Loss: 0.3130, Test Accuracy: 0.8788


In [None]:
# prompt: save all 3 models as.   .pkl  files

import pickle

# Save the GRU model
with open('gru_model.pkl', 'wb') as file:
    pickle.dump(gru_model, file)

# Save the CNN model
with open('cnn_model.pkl', 'wb') as file:
    pickle.dump(cnn_model, file)

# Save the Hybrid CNN-GRU model
with open('hybrid_model.pkl', 'wb') as file:
    pickle.dump(hybrid_model, file)


In [None]:
import pickle

# Save the GRU model
with open('gru_model.pkl', 'wb') as file:
    pickle.dump(gru_model, file)


In [None]:
import pickle

# Save the CNN model
with open('cnn_model.pkl', 'wb') as file:
    pickle.dump(cnn_model, file)


In [None]:
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler

# Load dataset
file_path = "/Dataset-2.xlsx"
xls = pd.ExcelFile(file_path)
df = pd.read_excel(xls, sheet_name='Worksheet')

# Preprocessing
df['status'] = df['status'].map({'legitimate': 0, 'phishing': 1})
df.drop(columns=['url'], inplace=True)
df.drop_duplicates(inplace=True)

# Selecting key features
selected_features = [
    'length_url', 'nb_dots', 'nb_hyphens', 'https_token', 'prefix_suffix',
    'random_domain', 'shortening_service', 'nb_redirection', 'web_traffic',
    'dns_record', 'google_index', 'page_rank', 'domain_age'
]
X = df[selected_features]
y = df['status']

# Normalize data
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Save the model
with open("model/rf_model.pkl", "wb") as model_file:
    pickle.dump(rf_model, model_file)

# Save the scaler
with open("model/scaler.pkl", "wb") as scaler_file:
    pickle.dump(scaler, scaler_file)

print("✅ Model trained and saved successfully!")


FileNotFoundError: [Errno 2] No such file or directory: 'model/rf_model.pkl'