In [None]:
!pip install numpy==1.24.3 scikit-learn==1.2.2


Collecting numpy==1.24.3
  Downloading numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting scikit-learn==1.2.2
  Downloading scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading numpy-1.24.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m81.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m101.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy, scikit-learn
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.6.1
 

In [None]:
import joblib
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load model parameters with compatibility check
def load_parameters(file_path):
    """Load model parameters and extract only numerical parameters."""
    try:
        model = joblib.load(file_path)

        # Extract coefficients or parameters
        if hasattr(model, 'coef_'):
            params = model.coef_.flatten()
        elif hasattr(model, 'get_params'):
            params = np.array(list(model.get_params().values())).flatten()
        else:
            raise ValueError(f"Unsupported model format: {file_path}")

        # Filter only numerical parameters
        params = np.array([p for p in params if isinstance(p, (int, float, np.number))])

        if len(params) == 0:
            print(f"⚠️ No numerical parameters found in {file_path}. Skipping.")
            return None

        return params

    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

# Paths to pkl files
file_paths = [
    'random_forest_model.pkl',
    'lightgbm_model.pkl',
    'model.pkl'
]

# Load all parameter vectors
parameter_vectors = [load_parameters(fp) for fp in file_paths]
parameter_vectors = [vec for vec in parameter_vectors if vec is not None]

# Ensure consistent dimensions by padding or truncating
max_length = max(len(vec) for vec in parameter_vectors)

def pad_or_truncate(vec, length):
    """Pad or truncate vectors to the same length."""
    if len(vec) < length:
        # Pad with zeros
        return np.pad(vec, (0, length - len(vec)), 'constant')
    else:
        # Truncate
        return vec[:length]

# Standardize all parameter vectors
parameter_vectors = [pad_or_truncate(vec, max_length) for vec in parameter_vectors]

# Check if all models failed
if not parameter_vectors:
    print("❌ No valid models loaded. Check compatibility and formats.")
    exit()

# Initialize reputation scores
reputation_scores = np.ones(len(parameter_vectors))

# Display initial parameter vectors
print("\n--- Initial Parameter Vectors ---")
for i, vec in enumerate(parameter_vectors):
    print(f"Client {i + 1} Parameters: {vec}")

# Multi-Krum aggregation
def multi_krum(updates, num_selected=2):
    """Perform Multi-Krum aggregation with display."""
    num_clients = len(updates)
    scores = []

    print("\n--- Multi-Krum Aggregation ---")

    # Calculate distances
    for i in range(num_clients):
        distances = [np.linalg.norm(updates[i] - updates[j])
                     for j in range(num_clients) if i != j]
        scores.append(sum(sorted(distances)[:num_selected]))
        print(f"Client {i + 1} Distances: {distances}")
        print(f"Client {i + 1} Score: {scores[-1]}")

    # Select the best vectors
    selected_indices = np.argsort(scores)[:num_selected]
    selected_updates = np.array([updates[i] for i in selected_indices])
    print("\nSelected Indices:", selected_indices)
    print("Selected Updates:")
    for idx in selected_indices:
        print(f"Client {idx + 1}: {updates[idx]}")

    return np.mean(selected_updates, axis=0)

# Reputation-based weighting
def reputation_weighting(updates, reputation_scores):
    """Apply reputation-based weighting with display."""
    print("\n--- Reputation-Based Weighting ---")

    weighted_updates = [
        update * rep for update, rep in zip(updates, reputation_scores)
    ]

    for i, (update, weight) in enumerate(zip(updates, reputation_scores)):
        print(f"Client {i + 1} Weighted Parameters: {update * weight}")

    aggregated = np.mean(weighted_updates, axis=0)
    print("\nWeighted Aggregated Parameters:", aggregated)
    return aggregated

# Update reputation scores
def update_reputation(updates, aggregated_update, reputation_scores, threshold=0.9, decay=0.95, boost=1.05):
    """Update reputation scores based on cosine similarity with display."""
    print("\n--- Reputation Update ---")

    similarities = [cosine_similarity([update], [aggregated_update])[0][0] for update in updates]

    for i, sim in enumerate(similarities):
        if sim < threshold:
            reputation_scores[i] *= decay  # Reduce score for deviation
            print(f"Client {i + 1}: Similarity = {sim:.4f} (Reduced Reputation)")
        else:
            reputation_scores[i] *= boost  # Increase score for valid contribution
            print(f"Client {i + 1}: Similarity = {sim:.4f} (Increased Reputation)")

    # Keep scores in range [0.01, 1.0]
    reputation_scores = np.clip(reputation_scores, 0.01, 1.0)
    print("Updated Reputation Scores:", reputation_scores)

    return reputation_scores

# Perform aggregation over multiple rounds
num_rounds = 5

for round in range(num_rounds):
    print(f"\n=== Round {round + 1} ===")

    # Step 1: Multi-Krum aggregation
    aggregated_update = multi_krum(parameter_vectors)

    # Step 2: Reputation-weighted aggregation
    final_update = reputation_weighting(parameter_vectors, reputation_scores)

    # Step 3: Update reputation scores
    reputation_scores = update_reputation(parameter_vectors, aggregated_update, reputation_scores)

    print(f"\nFinal Aggregated Parameters (Round {round + 1}):")
    print(final_update)

print("\n=== Final Result After All Rounds ===")
print("Final Reputation Scores:", reputation_scores)
print("Final Aggregated Parameters:", final_update)


Error loading lightgbm_model.pkl: No module named 'numpy._core'

--- Initial Parameter Vectors ---
Client 1 Parameters: [  1.   0.   0.   1.   2.   0. 100.   0.   0.   0.   0.]
Client 2 Parameters: [  1.   0.   0.   1.   2.   0. 100.   0.  42.   0.   0.]

=== Round 1 ===

--- Multi-Krum Aggregation ---
Client 1 Distances: [42.0]
Client 1 Score: 42.0
Client 2 Distances: [42.0]
Client 2 Score: 42.0

Selected Indices: [0 1]
Selected Updates:
Client 1: [  1.   0.   0.   1.   2.   0. 100.   0.   0.   0.   0.]
Client 2: [  1.   0.   0.   1.   2.   0. 100.   0.  42.   0.   0.]

--- Reputation-Based Weighting ---
Client 1 Weighted Parameters: [  1.   0.   0.   1.   2.   0. 100.   0.   0.   0.   0.]
Client 2 Weighted Parameters: [  1.   0.   0.   1.   2.   0. 100.   0.  42.   0.   0.]

Weighted Aggregated Parameters: [  1.   0.   0.   1.   2.   0. 100.   0.  21.   0.   0.]

--- Reputation Update ---
Client 1: Similarity = 0.9787 (Increased Reputation)
Client 2: Similarity = 0.9819 (Increased Re

In [None]:
# Save the final aggregated model parameters
output_path = 'aggregated_model.pkl'

# Save the final aggregated parameters using joblib
joblib.dump(final_update, output_path)
print(f"\n✅ Aggregated model saved successfully at: {output_path}")



✅ Aggregated model saved successfully at: aggregated_model.pkl


In [12]:
import joblib
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import os

# Load model parameters with compatibility check
def load_parameters(file_path):
    """Load model parameters and extract only numerical parameters."""
    try:
        model = joblib.load(file_path)

        # Extract coefficients or parameters
        if hasattr(model, 'coef_'):
            params = model.coef_.flatten()
        elif hasattr(model, 'get_params'):
            params = np.array(list(model.get_params().values())).flatten()
        else:
            raise ValueError(f"Unsupported model format: {file_path}")

        # Filter only numerical parameters
        params = np.array([p for p in params if isinstance(p, (int, float, np.number))])

        if len(params) == 0:
            print(f"⚠️ No numerical parameters found in {file_path}. Skipping.")
            return None

        return params

    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

# Paths to pkl files
file_paths = [
    'random_forest_model.pkl',
    'lightgbm_model.pkl',
    'model.pkl'
]

# Load all parameter vectors
parameter_vectors = [load_parameters(fp) for fp in file_paths]
parameter_vectors = [vec for vec in parameter_vectors if vec is not None]

# Ensure consistent dimensions by padding or truncating
if parameter_vectors:
    max_length = max(len(vec) for vec in parameter_vectors)

    def pad_or_truncate(vec, length):
        """Pad or truncate vectors to the same length."""
        if len(vec) < length:
            # Pad with zeros
            return np.pad(vec, (0, length - len(vec)), 'constant')
        else:
            # Truncate
            return vec[:length]

    # Standardize all parameter vectors
    parameter_vectors = [pad_or_truncate(vec, max_length) for vec in parameter_vectors]
else:
    print("❌ No valid models loaded. Check compatibility and formats.")
    exit()

# Initialize reputation scores
reputation_scores = np.ones(len(parameter_vectors))

# Display initial parameter vectors
print("\n--- Initial Parameter Vectors ---")
for i, vec in enumerate(parameter_vectors):
    print(f"Client {i + 1} Parameters: {vec}")

# Multi-Krum aggregation
def multi_krum(updates, num_selected=2):
    """Perform Multi-Krum aggregation with display."""
    num_clients = len(updates)
    scores = []

    print("\n--- Multi-Krum Aggregation ---")

    # Calculate distances
    for i in range(num_clients):
        distances = [np.linalg.norm(updates[i] - updates[j])
                     for j in range(num_clients) if i != j]
        scores.append(sum(sorted(distances)[:num_selected]))
        print(f"Client {i + 1} Distances: {distances}")
        print(f"Client {i + 1} Score: {scores[-1]}")

    # Select the best vectors
    selected_indices = np.argsort(scores)[:num_selected]
    selected_updates = np.array([updates[i] for i in selected_indices])
    print("\nSelected Indices:", selected_indices)
    print("Selected Updates:")
    for idx in selected_indices:
        print(f"Client {idx + 1}: {updates[idx]}")

    return np.mean(selected_updates, axis=0)

# Save model after Multi-Krum aggregation
def save_model(parameters, round_num, save_dir='aggregated_models'):
    """Save the aggregated model parameters to a file."""
    os.makedirs(save_dir, exist_ok=True)
    file_path = os.path.join(save_dir, f'aggregated_model_round_{round_num}.pkl')
    joblib.dump(parameters, file_path)
    print(f"\n✅ Model saved after Round {round_num}: {file_path}")

# Reputation-based weighting
def reputation_weighting(updates, reputation_scores):
    """Apply reputation-based weighting with display."""
    print("\n--- Reputation-Based Weighting ---")

    weighted_updates = [
        update * rep for update, rep in zip(updates, reputation_scores)
    ]

    for i, (update, weight) in enumerate(zip(updates, reputation_scores)):
        print(f"Client {i + 1} Weighted Parameters: {update * weight}")

    aggregated = np.mean(weighted_updates, axis=0)
    print("\nWeighted Aggregated Parameters:", aggregated)
    return aggregated

# Update reputation scores
def update_reputation(updates, aggregated_update, reputation_scores, threshold=0.9, decay=0.95, boost=1.05):
    """Update reputation scores based on cosine similarity with display."""
    print("\n--- Reputation Update ---")

    similarities = [cosine_similarity([update], [aggregated_update])[0][0] for update in updates]

    for i, sim in enumerate(similarities):
        if sim < threshold:
            reputation_scores[i] *= decay  # Reduce score for deviation
            print(f"Client {i + 1}: Similarity = {sim:.4f} (Reduced Reputation)")
        else:
            reputation_scores[i] *= boost  # Increase score for valid contribution
            print(f"Client {i + 1}: Similarity = {sim:.4f} (Increased Reputation)")

    # Keep scores in range [0.01, 1.0]
    reputation_scores = np.clip(reputation_scores, 0.01, 1.0)
    print("Updated Reputation Scores:", reputation_scores)

    return reputation_scores

# Perform aggregation over multiple rounds
num_rounds = 5

for round in range(1, num_rounds + 1):
    print(f"\n=== Round {round} ===")

    # Step 1: Multi-Krum aggregation
    aggregated_update = multi_krum(parameter_vectors)

    # Save the model after Multi-Krum aggregation
    save_model(aggregated_update, round)

    # Step 2: Reputation-weighted aggregation
    final_update = reputation_weighting(parameter_vectors, reputation_scores)

    # Step 3: Update reputation scores
    reputation_scores = update_reputation(parameter_vectors, aggregated_update, reputation_scores)

    print(f"\nFinal Aggregated Parameters (Round {round}):")
    print(final_update)

print("\n=== Final Result After All Rounds ===")
print("Final Reputation Scores:", reputation_scores)
print("Final Aggregated Parameters:", final_update)


Error loading lightgbm_model.pkl: No module named 'numpy._core'


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations



--- Initial Parameter Vectors ---
Client 1 Parameters: [  1.   0.   0.   1.   2.   0. 100.   0.   0.   0.   0.]
Client 2 Parameters: [  1.   0.   0.   1.   2.   0. 100.   0.  42.   0.   0.]

=== Round 1 ===

--- Multi-Krum Aggregation ---
Client 1 Distances: [42.0]
Client 1 Score: 42.0
Client 2 Distances: [42.0]
Client 2 Score: 42.0

Selected Indices: [0 1]
Selected Updates:
Client 1: [  1.   0.   0.   1.   2.   0. 100.   0.   0.   0.   0.]
Client 2: [  1.   0.   0.   1.   2.   0. 100.   0.  42.   0.   0.]

✅ Model saved after Round 1: aggregated_models/aggregated_model_round_1.pkl

--- Reputation-Based Weighting ---
Client 1 Weighted Parameters: [  1.   0.   0.   1.   2.   0. 100.   0.   0.   0.   0.]
Client 2 Weighted Parameters: [  1.   0.   0.   1.   2.   0. 100.   0.  42.   0.   0.]

Weighted Aggregated Parameters: [  1.   0.   0.   1.   2.   0. 100.   0.  21.   0.   0.]

--- Reputation Update ---
Client 1: Similarity = 0.9787 (Increased Reputation)
Client 2: Similarity = 0.9819 

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the dataset
file_path = 'Friday_Afternoon_DDos_CICIDS17.csv'
df = pd.read_csv(file_path)

# === Clean Column Names ===
df.columns = df.columns.str.strip()

# === Drop Irrelevant Columns ===
columns_to_drop = ['Fwd PSH Flags', 'Bwd PSH Flags', 'Fwd URG Flags', 'Bwd URG Flags',
                   'CWE Flag Count', 'ECE Flag Count']

df = df.drop(columns=columns_to_drop, errors='ignore')

# === Convert all columns to float (handles mixed types)
for col in df.columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# === Handle Infinity and Large Values ===
# Replace inf with NaN
df.replace([np.inf, -np.inf], np.nan, inplace=True)

# Clip extremely large values (based on domain knowledge)
# Adjust the threshold as needed
threshold = 1e9  # Example threshold
df = df.clip(-threshold, threshold)

# === Handle Missing Values ===
numeric_cols = df.select_dtypes(include='number').columns
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())

# === Convert Labels to Binary ===
df['Label'] = df['Label'].apply(lambda x: 0 if x == 'BENIGN' else 1)

# === Separate Features and Labels ===
X = df.drop('Label', axis=1)
y = df['Label']

# === Standardize the Features ===
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# === Split into Training and Testing Sets ===
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

print("Preprocessing complete.")
print(f"Training set: {X_train.shape}, Testing set: {X_test.shape}")


Preprocessing complete.
Training set: (158021, 72), Testing set: (67724, 72)


In [19]:
import joblib
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load model parameters with compatibility check
def load_parameters(file_path):
    """Load model parameters and extract only numerical parameters."""
    try:
        model = joblib.load(file_path)

        # Extract coefficients or parameters
        if hasattr(model, 'coef_'):
            params = model.coef_.flatten()
        elif hasattr(model, 'get_params'):
            params = np.array(list(model.get_params().values())).flatten()
        else:
            raise ValueError(f"Unsupported model format: {file_path}")

        # Filter only numerical parameters
        params = np.array([p for p in params if isinstance(p, (int, float, np.number))])

        if len(params) == 0:
            print(f"⚠️ No numerical parameters found in {file_path}. Skipping.")
            return None

        return params

    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

# Paths to pkl files
file_paths = [
    'random_forest_model.pkl',
    'lightgbm_model.pkl',
    'model.pkl'
]

# Load all parameter vectors
parameter_vectors = [load_parameters(fp) for fp in file_paths]
parameter_vectors = [vec for vec in parameter_vectors if vec is not None]

# Ensure consistent dimensions by padding or truncating
if parameter_vectors:
    max_length = max(len(vec) for vec in parameter_vectors)

    def pad_or_truncate(vec, length):
        """Pad or truncate vectors to the same length."""
        if len(vec) < length:
            return np.pad(vec, (0, length - len(vec)), 'constant')
        else:
            return vec[:length]

    parameter_vectors = [pad_or_truncate(vec, max_length) for vec in parameter_vectors]

# Check if all models failed
if not parameter_vectors:
    print("❌ No valid models loaded. Check compatibility and formats.")
    exit()

# Initialize reputation scores
reputation_scores = np.ones(len(parameter_vectors))

# Multi-Krum aggregation
def multi_krum(updates, num_selected=2):
    """Perform Multi-Krum aggregation with display."""
    num_clients = len(updates)
    scores = []

    for i in range(num_clients):
        distances = [np.linalg.norm(updates[i] - updates[j])
                     for j in range(num_clients) if i != j]
        scores.append(sum(sorted(distances)[:num_selected]))

    selected_indices = np.argsort(scores)[:num_selected]
    selected_updates = np.array([updates[i] for i in selected_indices])

    return np.mean(selected_updates, axis=0)

# Reputation-based weighting
def reputation_weighting(updates, reputation_scores):
    """Apply reputation-based weighting."""
    weighted_updates = [update * rep for update, rep in zip(updates, reputation_scores)]
    aggregated = np.mean(weighted_updates, axis=0)
    return aggregated

# Update reputation scores
def update_reputation(updates, aggregated_update, reputation_scores, threshold=0.9, decay=0.95, boost=1.05):
    """Update reputation scores based on cosine similarity."""
    similarities = [cosine_similarity([update], [aggregated_update])[0][0] for update in updates]

    for i, sim in enumerate(similarities):
        if sim < threshold:
            reputation_scores[i] *= decay
        else:
            reputation_scores[i] *= boost

    reputation_scores = np.clip(reputation_scores, 0.01, 1.0)
    return reputation_scores

# Save aggregated model as pkl
def save_aggregated_model(final_update, file_name='aggregated_model.pkl'):
    """Save the aggregated parameters into a .pkl file."""
    try:
        joblib.dump(final_update, file_name)
        print(f"\n✅ Aggregated model saved as '{file_name}'")
    except Exception as e:
        print(f"❌ Error saving model: {e}")

# Perform aggregation over multiple rounds
num_rounds = 5

for round in range(num_rounds):
    print(f"\n=== Round {round + 1} ===")

    # Step 1: Multi-Krum aggregation
    aggregated_update = multi_krum(parameter_vectors)

    # Step 2: Reputation-weighted aggregation
    final_update = reputation_weighting(parameter_vectors, reputation_scores)

    # Step 3: Update reputation scores
    reputation_scores = update_reputation(parameter_vectors, aggregated_update, reputation_scores)

# Save the final aggregated model
save_aggregated_model(final_update)

print("\n=== Final Result After All Rounds ===")
print("Final Reputation Scores:", reputation_scores)
print("Final Aggregated Parameters saved in 'aggregated_model.pkl'")


Error loading lightgbm_model.pkl: No module named 'numpy._core'

=== Round 1 ===

=== Round 2 ===

=== Round 3 ===

=== Round 4 ===

=== Round 5 ===

✅ Aggregated model saved as 'aggregated_model.pkl'

=== Final Result After All Rounds ===
Final Reputation Scores: [1. 1.]
Final Aggregated Parameters saved in 'aggregated_model.pkl'


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [22]:
import joblib

# Load the model
model = joblib.load('aggregated_model.pkl')

# Check the type
print(type(model))


<class 'numpy.ndarray'>


In [25]:
import joblib
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# === Load the Individual RandomForest Models ===
rf1 = joblib.load('model.pkl')
rf2 = joblib.load('random_forest_model.pkl')

# === Combine the Models into an Aggregated Model ===
aggregated_model = VotingClassifier(
    estimators=[('rf1', rf1), ('rf2', rf2)],
    voting='soft'
)

# === Train the Aggregated Model ===
aggregated_model.fit(X_train, y_train)

# === Make Predictions ===
y_pred = aggregated_model.predict(X_test)

# === Evaluate the Model ===
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

# === Display Results ===
print("\n=== Model Evaluation Results ===")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(report)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations



=== Model Evaluation Results ===
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000

Confusion Matrix:
[[67724]]

Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00     67724

    accuracy                           1.00     67724
   macro avg       1.00      1.00      1.00     67724
weighted avg       1.00      1.00      1.00     67724

