In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [2]:
# Load the dataset
data_path = 'dataset_full.csv'
df = pd.read_csv(data_path)

In [3]:
# Splitting data into features (X) and target (y)
X = df.drop(columns=['phishing'])
y = df['phishing']

In [4]:
# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [5]:
# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# Building the Deep Learning Model
model = Sequential()
model.add(Dense(128, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [8]:
# Train the model
history = model.fit(X_train_scaled, y_train, epochs=20, batch_size=64, validation_split=0.2, verbose=1)

Epoch 1/20
[1m776/776[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.8925 - loss: 0.2581 - val_accuracy: 0.9384 - val_loss: 0.1531
Epoch 2/20
[1m776/776[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9338 - loss: 0.1675 - val_accuracy: 0.9465 - val_loss: 0.1349
Epoch 3/20
[1m776/776[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9379 - loss: 0.1530 - val_accuracy: 0.9533 - val_loss: 0.1243
Epoch 4/20
[1m776/776[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9453 - loss: 0.1415 - val_accuracy: 0.9548 - val_loss: 0.1208
Epoch 5/20
[1m776/776[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9488 - loss: 0.1362 - val_accuracy: 0.9566 - val_loss: 0.1156
Epoch 6/20
[1m776/776[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.9500 - loss: 0.1294 - val_accuracy: 0.9554 - val_loss: 0.1172
Epoch 7/20
[1m776/776[0m 

In [9]:
# Evaluate the model on the test set
y_pred_prob = model.predict(X_test_scaled)
y_pred = (y_pred_prob > 0.5).astype(int).reshape(-1)

[1m832/832[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step


In [10]:
# Calculate performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

In [11]:
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 95.59%


In [12]:
import tensorflow as tf

In [13]:
# Save in SavedModel format (recommended)
model.save('my_model.h5') 
# To save in HDF5 format:
# model.save('my_model.h5')

