# DefensorAI: Phishing Detection Notebook

This notebook demonstrates phishing detection using:
- Rule-based algorithms
- Machine Learning (ML) models
- Deep Learning (AI) models
- YARA-based detection


In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from src.detection.phishing_detection_algorithm import detect_phishing
from src.detection.phishing_ml_model import train_ml_model, evaluate_ml_model
from src.detection.phishing_ai_model import build_ai_model, train_ai_model, evaluate_ai_model
from src.detection.phishing_rules_loader import compile_yara_rules
from src.preprocessing.phishing_preprocessing import load_phishing_data, preprocess_data


In [None]:
# Rule-based phishing detection
sample_domains = ["secure-login.bank.com", "example.com", "phishing.bank.example"]
sample_urls = ["http://secure-login.bank.com", "https://safe-site.example.com"]

print("\n--- Rule-Based Domain Detection ---")
for domain in sample_domains:
    print(f"Domain: {domain}, Result: {detect_phishing(domain, 'domain')}")

print("\n--- Rule-Based URL Detection ---")
for url in sample_urls:
    print(f"URL: {url}, Result: {detect_phishing(url, 'url')}")


In [None]:
# Prepare features and labels
X = domains[['length', 'num_dots', 'has_suspicious_keywords']]
y = [1] * len(domains)  # Assuming all are phishing samples

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train ML model
ml_model = train_ml_model(X_train, y_train)

# Evaluate ML model
print("\n--- ML Model Evaluation ---")
evaluate_ml_model(ml_model, X_test, y_test)


In [None]:
# Build and train the AI model
input_dim = X_train.shape[1]
ai_model = build_ai_model(input_dim=input_dim)
train_ai_model(ai_model, X_train, y_train)

# Evaluate AI model
print("\n--- AI Model Evaluation ---")
evaluate_ai_model(ai_model, X_test, y_test)


In [None]:
# Compile and test YARA rules
yara_rules = compile_yara_rules()

# Test YARA rules on sample domains
sample_data = ["secure-login.bank.com", "example.com", "http://secure-login.bank.com"]
print("\n--- YARA-Based Detection ---")
for data in sample_data:
    matches = yara_rules.match(data=data)
    print(f"Data: {data}, Matches: {matches}")
