In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from gan import GAN
from unified_network import UnifiedNetwork
from data_loader import load_network_traffic_from_txt, load_phishing_emails

phishing_emails, labels = load_phishing_emails('datasets/Phishing_Legitimate_full.csv')
network_traffic = load_network_traffic_from_txt('datasets/Train.txt')

min_samples = min(network_traffic.shape[0], labels.shape[0])
network_traffic = network_traffic[:min_samples]
labels = labels[:min_samples]
unified_network = UnifiedNetwork(input_shape=network_traffic.shape[1], phishing_email_shape=phishing_emails.shape[1])

gan = GAN(generator=unified_network.generator, discriminator=unified_network.discriminator)

# Generate synthetic phishing emails
num_samples = 10
noise_size = 100
threshold = 0.5
synthetic_emails = gan.generate(num_samples=num_samples, noise_size=noise_size)
# synthetic_emails = np.array([np.array(email)])
discriminator_predictions = unified_network.discriminator.predict(synthetic_emails)
binary_predictions = (np.array(discriminator_predictions) > threshold).astype(int)
binary_predictions = binary_predictions.reshape(-1)
real_labels = np.zeros(num_samples)
real_labels = np.array([0] * (num_samples // 2) + [1] * (num_samples - num_samples // 2))

# Print shapes and types to ensure they match
print("Shape of real_labels:", real_labels.shape)
print("Shape of binary_predictions:", binary_predictions.shape)
print("Data type of real_labels:", real_labels.dtype)
print("Data type of binary_predictions:", binary_predictions.dtype)
# Debug: Check unique values to ensure binary format
print("Unique values in real_labels:", np.unique(real_labels))
print("Unique values in binary_predictions:", np.unique(binary_predictions))
# num_samples = 10
# synthetic_emails = unified_network.generate_synthetic_data(num_samples=num_samples, noise_size=100)

# print("Generated Synthetic Phishing Emails:")
# print(synthetic_emails)

if set(np.unique(real_labels)).issubset({0, 1}) and set(np.unique(binary_predictions)).issubset({0, 1}):
    accuracy = accuracy_score(real_labels, binary_predictions)
    precision = precision_score(real_labels, binary_predictions, zero_division=1)
    recall = recall_score(real_labels, binary_predictions, zero_division=1)
    f1 = f1_score(real_labels, binary_predictions, zero_division=1)

    # Print metrics
    print(f"GAN Demo for Phishing Email Generation:")
    print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")
else:
    print("Error: Non-binary labels detected. Check real_labels and binary_predictions.")
# accuracy = accuracy_score(real_labels, binary_predictions)
# precision = precision_score(real_labels, binary_predictions)
# recall = recall_score(real_labels, binary_predictions)
# f1 = f1_score(real_labels, binary_predictions)
# Calculate statistics for real and generated data
# real_mean = np.mean(phishing_emails, axis=0)
# real_std = np.std(phishing_emails, axis=0)

generated_mean = np.mean(synthetic_emails, axis=0)
generated_std = np.std(synthetic_emails, axis=0)
print(f"Real Data Mean: {real_mean[:5]}")
print(f"Generated Data Mean: {generated_mean[:5]}")
print(f"Real Data Std: {real_std[:5]}")
print(f"Generated Data Std: {generated_std[:5]}")



Shape of real_labels: (10,)
Shape of binary_predictions: (10,)
Data type of real_labels: int64
Data type of binary_predictions: int64
Unique values in real_labels: [0 1]
Unique values in binary_predictions: [0 1]
GAN Demo for Phishing Email Generation:
Accuracy: 0.5000, Precision: 0.5000, Recall: 0.6000, F1-Score: 0.5455


NameError: name 'real_mean' is not defined