# Multi-Layer Perceptron Neural Network

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
from nltk.tokenize import RegexpTokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import pandas as pd
import numpy as np
import random

# Read dataset
df = pd.read_csv('phishing_site_urls.csv')
urls = df['URL']
labels = df['Label']

# Map labels to binary values
label_mapping = {'good': 1, 'bad': 0}
y = labels.map(label_mapping).values

# Tokenize and vectorize URLs
tokenizer = RegexpTokenizer(r'\w+')
url_tokens = [' '.join(tokenizer.tokenize(url)) for url in urls]

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(url_tokens).toarray()  # Convert sparse matrix to dense

# Lists to store metrics
accuracies = []
roc_auc_scores = []
f1_scores = []

# Define the MLP model structure
def create_mlp_model(input_dim):
    model = Sequential()
    model.add(Dense(10, input_dim=input_dim, activation='relu'))  # First hidden layer
    model.add(Dense(10, activation='relu'))                      # Second hidden layer
    model.add(Dense(1, activation='sigmoid'))                    # Output layer
    model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Train and evaluate 15 MLP models
for i in range(15):
    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=random.randint(0, 1000)
    )
    
    # Create and train the MLP model
    model = create_mlp_model(X_train.shape[1])
    model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)
    
    # Predict on the test set
    y_pred_proba = model.predict(X_test).flatten()  # Flatten for ROC AUC
    y_pred = (y_pred_proba > 0.5).astype(int)  # Threshold to convert probabilities to binary
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_proba)
    f1 = f1_score(y_test, y_pred)
    
    # Append metrics to lists
    accuracies.append(accuracy)
    roc_auc_scores.append(roc_auc)
    f1_scores.append(f1)
    
    print(f"Iteration {i+1}: Accuracy = {accuracy:.4f}, ROC AUC = {roc_auc:.4f}, F1 = {f1:.4f}")

# Save metrics to separate CSV files
pd.DataFrame({'accuracy': accuracies}).to_csv('MLP_accuracies.csv', index=False)
pd.DataFrame({'roc_auc': roc_auc_scores}).to_csv('MLP_roc_auc.csv', index=False)
pd.DataFrame({'f1_score': f1_scores}).to_csv('MLP_f1_scores.csv', index=False)

print("\nMetrics saved to 'MLP_accuracies.csv', 'MLP_roc_auc.csv', and 'MLP_f1_scores.csv'")
