# Phishing Detection using Feed forward Neural Network

## Preparing the model

In [25]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report

In [26]:
df = pd.read_csv('./dataset/dataset_full.csv')

In [27]:
# Split the dataset into features and labels
X = df.iloc[:, :-1].values  # Features
y = df.iloc[:, -1].values   # Labels

In [28]:
# Encode the labels as binary (0 for 'good', 1 for 'bad')
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [29]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
# Determine input shape dynamically
input_shape = X_train.shape[1]
input_shape

111

In [31]:
# Convert data to TensorFlow tensors
X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
X_test = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_test = tf.convert_to_tensor(y_test, dtype=tf.float32)

In [32]:
# Define a simple neural network model using TensorFlow/Keras
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, input_shape=(input_shape,), activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [33]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [34]:
# Training loop
epochs = 10
batch_size = 32

model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x272569ff190>

In [35]:
# Evaluate the model on the test data
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')

Test Loss: 0.2606, Test Accuracy: 0.9137


In [36]:
# Make predictions on new data
new_data = np.array([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, -1, -1, 0, 13, -1, -1, 0, -1, 1, 1, 1]], dtype=np.float32)  # Replace with your new feature values
new_data
predictions = model.predict(new_data)



In [37]:
# Convert the predictions back to 'good' or 'bad'
predicted_labels = ['good' if p < 0.5 else 'bad' for p in predictions]

print(f'Predicted Label: {predicted_labels[0]}')


Predicted Label: good


In [38]:
# Save the model to a .keras archive file
model.save('./neuralnetwork-model/phishing_updated.keras')

In [39]:
# Evaluate the model on the test data
y_pred = model.predict(X_test)
y_pred_binary = (y_pred >= 0.5).astype(int)  # Convert probabilities to binary predictions

conf_matrix = confusion_matrix(y_test, y_pred_binary)
print("Confusion Matrix:")
print(conf_matrix)

# Print classification report for additional metrics
classification_rep = classification_report(y_test, y_pred_binary)
print("Classification Report:")
print(classification_rep)

  1/555 [..............................] - ETA: 15s

Confusion Matrix:
[[10795   817]
 [  713  5405]]
Classification Report:
              precision    recall  f1-score   support

         0.0       0.94      0.93      0.93     11612
         1.0       0.87      0.88      0.88      6118

    accuracy                           0.91     17730
   macro avg       0.90      0.91      0.90     17730
weighted avg       0.91      0.91      0.91     17730



## Test the model

In [46]:
# load the saved keras model
model = tf.keras.models.load_model('./neuralnetwork-model/phishing_updated.keras')

In [47]:
from feature_extract import extract_features_from_url
url = "googel.com"
features = extract_features_from_url(url)
print(features)


[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, -1, -1, 0, -1, -1, -1, 0, -1, 1, 1, 1]


In [48]:
new_data = np.array([features],dtype=np.float32)
predictions = model.predict(new_data)
predicted_labels = ['good' if p < 0.5 else 'bad' for p in predictions]
print(f'Predicted Label: {predicted_labels[0]}')
print(predictions)

Predicted Label: good
[[0.38336068]]
