# Project 34: Detecting Noisy Neighbors in a Multi-tenant Cloud Environment

**Objective:** Build an unsupervised anomaly detection model that can identify a "noisy neighbor" (a high-resource-consuming tenant) on a shared host by analyzing the network traffic patterns of all tenants and flagging outliers.

**Dataset Source:** Synthetically Generated (simulated multi-tenant network traffic with periodic noisy neighbor behavior)

**Model:** Isolation Forest for unsupervised outlier detection

**Instructions:**
This notebook is fully self-contained and does not require external files. Simply run all cells in sequence.

In [None]:
# ==================================================================================
#  Project 34: Noisy Neighbors Detection - Setup and Imports
# ==================================================================================

import pandas as pd
import numpy as np
import random
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support
import matplotlib.pyplot as plt
import seaborn as sns
import time
from datetime import datetime, timedelta

# Set random seeds for reproducibility
random.seed(42)
np.random.seed(42)

print("All libraries imported successfully.")

In [None]:
# ==================================================================================
#  Synthetic Tenant Traffic Data Generation
# ==================================================================================

print("--- Generating Synthetic Multi-Tenant Network Traffic Dataset ---")

# Simulation parameters
num_tenants = 20
time_steps = 1000  # Simulate 1000 time intervals (e.g., minutes)
data = []
tenants = [f'tenant_{i+1}' for i in range(num_tenants)]
noisy_neighbor_tenant = 'tenant_5'
secondary_noisy_tenant = 'tenant_15'  # Add a second potential noisy neighbor

print(f"Simulation parameters:")
print(f"• Number of tenants: {num_tenants}")
print(f"• Time steps: {time_steps}")
print(f"• Primary noisy neighbor: {noisy_neighbor_tenant}")
print(f"• Secondary noisy neighbor: {secondary_noisy_tenant}")

for t in range(time_steps):
    for tenant in tenants:
        is_noisy = False
        
        # Define normal behavior with some variation based on tenant
        if 'tenant_1' in tenant or 'tenant_2' in tenant:  # Low activity tenants
            base_pps = max(0, np.random.normal(2000, 500))  # Lower baseline
            base_bps = base_pps * np.random.normal(250, 30)
        elif 'tenant_19' in tenant or 'tenant_20' in tenant:  # High activity tenants
            base_pps = max(0, np.random.normal(8000, 1200))  # Higher baseline
            base_bps = base_pps * np.random.normal(400, 60)
        else:  # Normal tenants
            base_pps = max(0, np.random.normal(5000, 1000))  # Packets per second
            base_bps = base_pps * np.random.normal(300, 50)  # Bytes per second
        
        # Add time-based patterns (some tenants are more active during certain periods)
        time_factor = 1 + 0.3 * np.sin(2 * np.pi * t / 100)  # Daily pattern
        base_pps *= time_factor
        base_bps *= time_factor
        
        # --- Simulate the Primary Noisy Neighbor event ---
        # The noisy neighbor has a burst of high activity for a specific period
        if tenant == noisy_neighbor_tenant and 400 <= t < 600:
            base_pps *= np.random.uniform(5, 10)  # 5-10x more packets
            base_bps *= np.random.uniform(5, 10)  # 5-10x more bytes
            is_noisy = True
        
        # --- Simulate a Secondary Noisy Neighbor event ---
        # Shorter, more intense burst from another tenant
        if tenant == secondary_noisy_tenant and 750 <= t < 800:
            base_pps *= np.random.uniform(8, 15)  # Even higher spike
            base_bps *= np.random.uniform(8, 15)
            is_noisy = True
        
        # Add some random spikes for other tenants (false positives to test robustness)
        if not is_noisy and np.random.random() < 0.002:  # 0.2% chance of random spike
            base_pps *= np.random.uniform(2, 4)
            base_bps *= np.random.uniform(2, 4)
        
        # Calculate additional derived metrics
        avg_packet_size = base_bps / max(base_pps, 1)  # Avoid division by zero
        network_utilization = min(base_bps / 1000000, 100)  # Percentage of 1Mbps link
        
        data.append([t, tenant, base_pps, base_bps, avg_packet_size, 
                    network_utilization, is_noisy])

df = pd.DataFrame(data, columns=['timestamp', 'tenant_id', 'packets_per_second', 
                                'bytes_per_second', 'avg_packet_size', 
                                'network_utilization', 'is_truly_noisy'])

# Ensure no negative values
df['packets_per_second'] = df['packets_per_second'].clip(lower=0)
df['bytes_per_second'] = df['bytes_per_second'].clip(lower=0)

print(f"\nDataset generation complete. Created {len(df)} records.")
print(f"Total noisy neighbor events: {df['is_truly_noisy'].sum()}")
print(f"Percentage of noisy events: {(df['is_truly_noisy'].sum() / len(df)) * 100:.2f}%")

print("\nDataset Sample:")
print(df.sample(10).round(2))

print("\nDataset Statistics:")
print(df.describe().round(2))

In [None]:
# ==================================================================================
#  Data Exploration and Visualization
# ==================================================================================

print("--- Data Exploration and Pattern Analysis ---")

# Create comprehensive exploration visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(18, 12))
fig.suptitle('Multi-Tenant Network Traffic Analysis', fontsize=16)

# 1. Time series of packets per second for all tenants
pivot_data = df.pivot(index='timestamp', columns='tenant_id', values='packets_per_second')
ax1.plot(pivot_data.index, pivot_data[noisy_neighbor_tenant], 
         color='red', linewidth=2, label=f'{noisy_neighbor_tenant} (Primary Noisy)')
ax1.plot(pivot_data.index, pivot_data[secondary_noisy_tenant], 
         color='orange', linewidth=2, label=f'{secondary_noisy_tenant} (Secondary Noisy)')

# Plot a few normal tenants
normal_tenants = ['tenant_1', 'tenant_3', 'tenant_10']
for tenant in normal_tenants:
    ax1.plot(pivot_data.index, pivot_data[tenant], alpha=0.7, linewidth=1)

ax1.set_title('Packets per Second Over Time')
ax1.set_xlabel('Time Step')
ax1.set_ylabel('Packets per Second')
ax1.legend()
ax1.grid(True, alpha=0.3)

# 2. Distribution of network metrics
normal_data = df[df['is_truly_noisy'] == False]
noisy_data = df[df['is_truly_noisy'] == True]

ax2.hist(normal_data['packets_per_second'], bins=50, alpha=0.7, 
         label='Normal', color='blue', density=True)
ax2.hist(noisy_data['packets_per_second'], bins=30, alpha=0.7, 
         label='Noisy Neighbor', color='red', density=True)
ax2.set_title('Distribution of Packets per Second')
ax2.set_xlabel('Packets per Second')
ax2.set_ylabel('Density')
ax2.legend()
ax2.grid(True, alpha=0.3)

# 3. Scatter plot of packets vs bytes
ax3.scatter(normal_data['packets_per_second'], normal_data['bytes_per_second'], 
           alpha=0.6, label='Normal', s=20, color='blue')
ax3.scatter(noisy_data['packets_per_second'], noisy_data['bytes_per_second'], 
           alpha=0.8, label='Noisy Neighbor', s=30, color='red')
ax3.set_title('Packets vs Bytes per Second')
ax3.set_xlabel('Packets per Second')
ax3.set_ylabel('Bytes per Second')
ax3.legend()
ax3.grid(True, alpha=0.3)

# 4. Tenant activity heatmap
tenant_stats = df.groupby('tenant_id').agg({
    'packets_per_second': ['mean', 'max', 'std'],
    'is_truly_noisy': 'sum'
}).round(2)

tenant_stats.columns = ['_'.join(col).strip() for col in tenant_stats.columns]
tenant_means = tenant_stats['packets_per_second_mean'].values.reshape(4, 5)  # 4x5 grid

im = ax4.imshow(tenant_means, cmap='YlOrRd', aspect='auto')
ax4.set_title('Average Packets per Second by Tenant\n(Arranged in 4x5 Grid)')
ax4.set_xticks(range(5))
ax4.set_yticks(range(4))
ax4.set_xlabel('Tenant Column')
ax4.set_ylabel('Tenant Row')

# Add colorbar
plt.colorbar(im, ax=ax4, label='Avg Packets/sec')

plt.tight_layout()
plt.show()

# Display tenant behavior summary
print("\nTenant Behavior Summary:")
tenant_summary = df.groupby('tenant_id').agg({
    'packets_per_second': ['mean', 'max', 'std'],
    'bytes_per_second': ['mean', 'max'],
    'is_truly_noisy': 'sum'
}).round(2)

tenant_summary.columns = ['_'.join(col).strip() for col in tenant_summary.columns]
tenant_summary = tenant_summary.sort_values('is_truly_noisy_sum', ascending=False)
print(tenant_summary.head(10))

print(f"\nNoisy neighbor events by tenant:")
noisy_events = df[df['is_truly_noisy'] == True]['tenant_id'].value_counts()
print(noisy_events)

In [None]:
# ==================================================================================
#  Data Preparation and Feature Engineering
# ==================================================================================

print("--- Preparing Data for Unsupervised Learning ---")

# The features we'll use to detect anomalies
feature_cols = ['packets_per_second', 'bytes_per_second', 'avg_packet_size', 'network_utilization']
X = df[feature_cols].copy()

print(f"Original feature matrix shape: {X.shape}")
print(f"Features used for anomaly detection: {feature_cols}")

# Handle any potential NaN or infinite values
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(X.median())

print(f"\nFeature statistics before scaling:")
print(X.describe().round(2))

# Scale the features for better model performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print(f"\nFeatures scaled using StandardScaler")
print(f"Scaled feature matrix shape: {X_scaled.shape}")

# Display scaling parameters
print(f"\nScaling parameters:")
for i, feature in enumerate(feature_cols):
    print(f"  {feature}: mean={scaler.mean_[i]:.2f}, std={scaler.scale_[i]:.2f}")

# Store labels for evaluation (though we won't use them for training)
y_true = df['is_truly_noisy'].values
print(f"\nGround truth labels available for evaluation: {len(y_true)} samples")
print(f"True noisy neighbor samples: {np.sum(y_true)} ({np.mean(y_true)*100:.2f}%)")

In [None]:
# ==================================================================================
#  Model Training (Unsupervised)
# ==================================================================================

print("--- Model Training ---")

# Calculate expected contamination based on our data
expected_contamination = np.mean(y_true)
print(f"Expected contamination based on ground truth: {expected_contamination:.4f} ({expected_contamination*100:.2f}%)")

# The `contamination` parameter is the expected proportion of outliers in the data.
# We know our noisy events: Primary (200 events) + Secondary (50 events) = 250 events
# Out of total: 1000 time_steps * 20 tenants = 20,000 events
# So, the approximate contamination is 250 / 20,000 = 0.0125 or 1.25%.
contamination_rate = 0.015  # Slightly higher to be conservative

model = IsolationForest(
    n_estimators=200,
    contamination=contamination_rate,
    random_state=42,
    n_jobs=-1
)

print(f"\nIsolation Forest Configuration:")
print(f"• Number of estimators: {model.n_estimators}")
print(f"• Contamination rate: {model.contamination} ({model.contamination*100:.1f}%)")
print(f"• Random state: {model.random_state}")

print("\nTraining the Isolation Forest model on the entire dataset...")
start_time = time.time()
model.fit(X_scaled)
end_time = time.time()
print(f"Training completed in {end_time - start_time:.2f} seconds.")

# Make predictions (1 = normal, -1 = outlier/anomaly)
y_pred_raw = model.predict(X_scaled)
# Convert to binary (0 = normal, 1 = anomaly) for consistency with ground truth
y_pred = (y_pred_raw == -1).astype(int)

# Calculate anomaly scores
anomaly_scores = model.decision_function(X_scaled)

print(f"\nPrediction Summary:")
print(f"• Total samples: {len(y_pred)}")
print(f"• Predicted anomalies: {np.sum(y_pred)} ({np.mean(y_pred)*100:.2f}%)")
print(f"• Actual anomalies: {np.sum(y_true)} ({np.mean(y_true)*100:.2f}%)")
print(f"• Anomaly score range: [{np.min(anomaly_scores):.3f}, {np.max(anomaly_scores):.3f}]")

In [None]:
# ==================================================================================
#  Model Evaluation
# ==================================================================================

print("--- Model Evaluation ---")

# Calculate evaluation metrics
precision, recall, f1, support = precision_recall_fscore_support(
    y_true, y_pred, average='binary', pos_label=1
)

print(f"\nBinary Classification Metrics:")
print(f"• Precision: {precision:.3f} (of predicted anomalies, what % were actually anomalies)")
print(f"• Recall: {recall:.3f} (of actual anomalies, what % were detected)")
print(f"• F1-Score: {f1:.3f} (harmonic mean of precision and recall)")

# Detailed classification report
print("\nDetailed Classification Report:")
print(classification_report(y_true, y_pred, target_names=['Normal', 'Noisy Neighbor']))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
print(f"\nConfusion Matrix:")
print(f"                 Predicted")
print(f"Actual    Normal  Anomaly")
print(f"Normal    {cm[0,0]:6d}  {cm[0,1]:7d}")
print(f"Anomaly   {cm[1,0]:6d}  {cm[1,1]:7d}")

# Calculate additional metrics
tn, fp, fn, tp = cm.ravel()
accuracy = (tp + tn) / (tp + tn + fp + fn)
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
false_positive_rate = fp / (fp + tn) if (fp + tn) > 0 else 0

print(f"\nAdditional Metrics:")
print(f"• Accuracy: {accuracy:.3f}")
print(f"• Specificity: {specificity:.3f} (true negative rate)")
print(f"• False Positive Rate: {false_positive_rate:.3f}")
print(f"• True Positives: {tp}")
print(f"• False Positives: {fp}")
print(f"• True Negatives: {tn}")
print(f"• False Negatives: {fn}")

In [None]:
# ==================================================================================
#  Results Visualization
# ==================================================================================

print("--- Results Visualization ---")

# Create comprehensive results visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(18, 12))
fig.suptitle('Noisy Neighbor Detection Results', fontsize=16)

# 1. Confusion matrix heatmap
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Normal', 'Noisy Neighbor'], 
            yticklabels=['Normal', 'Noisy Neighbor'], ax=ax1)
ax1.set_title('Confusion Matrix')
ax1.set_ylabel('Actual')
ax1.set_xlabel('Predicted')

# 2. Anomaly scores distribution
normal_scores = anomaly_scores[y_true == 0]
anomaly_scores_true = anomaly_scores[y_true == 1]

ax2.hist(normal_scores, bins=50, alpha=0.7, label='Normal', color='blue', density=True)
ax2.hist(anomaly_scores_true, bins=30, alpha=0.7, label='True Anomalies', color='red', density=True)
ax2.axvline(model.offset_, color='green', linestyle='--', linewidth=2, label='Decision Threshold')
ax2.set_title('Anomaly Scores Distribution')
ax2.set_xlabel('Anomaly Score')
ax2.set_ylabel('Density')
ax2.legend()
ax2.grid(True, alpha=0.3)

# 3. Time series with detected anomalies
df_viz = df.copy()
df_viz['predicted_anomaly'] = y_pred
df_viz['anomaly_score'] = anomaly_scores

# Focus on the noisy neighbor tenant
noisy_tenant_data = df_viz[df_viz['tenant_id'] == noisy_neighbor_tenant]
ax3.plot(noisy_tenant_data['timestamp'], noisy_tenant_data['packets_per_second'], 
         color='blue', alpha=0.7, label='Traffic')

# Highlight true anomalies
true_anomalies = noisy_tenant_data[noisy_tenant_data['is_truly_noisy'] == 1]
ax3.scatter(true_anomalies['timestamp'], true_anomalies['packets_per_second'], 
           color='red', s=50, label='True Anomalies', zorder=5)

# Highlight detected anomalies
detected_anomalies = noisy_tenant_data[noisy_tenant_data['predicted_anomaly'] == 1]
ax3.scatter(detected_anomalies['timestamp'], detected_anomalies['packets_per_second'], 
           color='orange', s=30, marker='x', label='Detected Anomalies', zorder=5)

ax3.set_title(f'Anomaly Detection Results for {noisy_neighbor_tenant}')
ax3.set_xlabel('Time Step')
ax3.set_ylabel('Packets per Second')
ax3.legend()
ax3.grid(True, alpha=0.3)

# 4. Feature space visualization (PCA for 2D visualization)
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Plot normal and anomalous points
normal_mask = y_true == 0
anomaly_mask = y_true == 1
detected_mask = y_pred == 1

ax4.scatter(X_pca[normal_mask, 0], X_pca[normal_mask, 1], 
           alpha=0.6, s=20, color='blue', label='Normal')
ax4.scatter(X_pca[anomaly_mask, 0], X_pca[anomaly_mask, 1], 
           alpha=0.8, s=60, color='red', label='True Anomalies')
ax4.scatter(X_pca[detected_mask, 0], X_pca[detected_mask, 1], 
           alpha=0.6, s=40, facecolors='none', edgecolors='orange', 
           linewidth=2, label='Detected Anomalies')

ax4.set_title('Feature Space Visualization (PCA)')
ax4.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%} variance)')
ax4.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.1%} variance)')
ax4.legend()
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nPCA Analysis:")
print(f"• PC1 explains {pca.explained_variance_ratio_[0]:.1%} of variance")
print(f"• PC2 explains {pca.explained_variance_ratio_[1]:.1%} of variance")
print(f"• Total variance explained: {sum(pca.explained_variance_ratio_):.1%}")

In [None]:
# ==================================================================================
#  Tenant-wise Analysis
# ==================================================================================

print("--- Tenant-wise Anomaly Detection Analysis ---")

# Add predictions to the dataframe for analysis
df_analysis = df.copy()
df_analysis['predicted_anomaly'] = y_pred
df_analysis['anomaly_score'] = anomaly_scores

# Analyze detection performance by tenant
tenant_analysis = []
for tenant in tenants:
    tenant_data = df_analysis[df_analysis['tenant_id'] == tenant]
    
    total_samples = len(tenant_data)
    true_anomalies = tenant_data['is_truly_noisy'].sum()
    detected_anomalies = tenant_data['predicted_anomaly'].sum()
    
    # Calculate metrics for this tenant
    if true_anomalies > 0:
        true_positives = ((tenant_data['is_truly_noisy'] == 1) & 
                         (tenant_data['predicted_anomaly'] == 1)).sum()
        tenant_recall = true_positives / true_anomalies
        tenant_precision = true_positives / detected_anomalies if detected_anomalies > 0 else 0
    else:
        tenant_recall = 0
        tenant_precision = 0 if detected_anomalies == 0 else np.nan  # False positives only
    
    avg_anomaly_score = tenant_data['anomaly_score'].mean()
    min_anomaly_score = tenant_data['anomaly_score'].min()
    
    tenant_analysis.append({
        'tenant_id': tenant,
        'total_samples': total_samples,
        'true_anomalies': true_anomalies,
        'detected_anomalies': detected_anomalies,
        'recall': tenant_recall,
        'precision': tenant_precision,
        'avg_anomaly_score': avg_anomaly_score,
        'min_anomaly_score': min_anomaly_score
    })

tenant_df = pd.DataFrame(tenant_analysis)
tenant_df = tenant_df.sort_values('true_anomalies', ascending=False)

print("\nTenant Detection Performance:")
print(tenant_df.round(3))

# Focus on tenants with actual anomalies
noisy_tenants = tenant_df[tenant_df['true_anomalies'] > 0]
print(f"\nNoisy Tenant Detection Summary:")
for _, row in noisy_tenants.iterrows():
    print(f"• {row['tenant_id']}:")
    print(f"  - True anomalies: {row['true_anomalies']}")
    print(f"  - Detected anomalies: {row['detected_anomalies']}")
    print(f"  - Recall: {row['recall']:.3f}")
    print(f"  - Precision: {row['precision']:.3f}")
    print(f"  - Avg anomaly score: {row['avg_anomaly_score']:.3f}")

# Identify false positive cases
false_positive_tenants = tenant_df[(tenant_df['true_anomalies'] == 0) & 
                                  (tenant_df['detected_anomalies'] > 0)]
if len(false_positive_tenants) > 0:
    print(f"\nFalse Positive Analysis:")
    print(f"Tenants with false positive detections: {len(false_positive_tenants)}")
    for _, row in false_positive_tenants.iterrows():
        print(f"• {row['tenant_id']}: {row['detected_anomalies']} false positives")

# Calculate overall detection statistics
total_true_anomalies = tenant_df['true_anomalies'].sum()
total_detected_anomalies = tenant_df['detected_anomalies'].sum()
successful_detections = noisy_tenants['recall'].sum() * noisy_tenants['true_anomalies'].sum()

print(f"\nOverall Detection Statistics:")
print(f"• Total true anomalies across all tenants: {total_true_anomalies}")
print(f"• Total detected anomalies: {total_detected_anomalies}")
print(f"• Tenants with noisy behavior detected: {len(noisy_tenants)}")
print(f"• Average recall for noisy tenants: {noisy_tenants['recall'].mean():.3f}")
print(f"• Average precision for noisy tenants: {noisy_tenants['precision'].mean():.3f}")

In [None]:
# ==================================================================================
#  Conclusion
# ==================================================================================

print("--- Conclusion ---")
print("The Isolation Forest model successfully learned to detect noisy neighbors in the multi-tenant cloud environment.")

print("\nKey Performance Results:")
print(f"• Overall accuracy: {accuracy:.3f} ({accuracy*100:.1f}%)")
print(f"• Precision: {precision:.3f} (reliability of anomaly alerts)")
print(f"• Recall: {recall:.3f} (coverage of actual noisy neighbors)")
print(f"• F1-Score: {f1:.3f} (balanced performance metric)")
print(f"• False positive rate: {false_positive_rate:.3f} ({false_positive_rate*100:.1f}%)")
print(f"• Training time: {end_time - start_time:.2f} seconds")

print("\nDetection Effectiveness:")
print(f"• Primary noisy neighbor ({noisy_neighbor_tenant}): {noisy_tenants[noisy_tenants['tenant_id'] == noisy_neighbor_tenant]['recall'].iloc[0]:.1%} detection rate")
print(f"• Secondary noisy neighbor ({secondary_noisy_tenant}): {noisy_tenants[noisy_tenants['tenant_id'] == secondary_noisy_tenant]['recall'].iloc[0]:.1%} detection rate")
print(f"• Total noisy events detected: {tp} out of {tp + fn}")
print(f"• False alarms generated: {fp} out of {total_detected_anomalies} alerts")

print("\nBusiness Impact:")
print("• **Performance Isolation**: Proactively identify tenants causing performance degradation")
print("• **Resource Management**: Enable targeted resource throttling or migration")
print("• **SLA Protection**: Prevent noisy neighbors from impacting other tenants' SLAs")
print("• **Cost Optimization**: Identify opportunities for workload balancing and right-sizing")

print("\nOperational Applications:")
print("• **Real-time Monitoring**: Deploy for continuous tenant behavior analysis")
print("• **Automated Alerting**: Generate alerts when anomalous behavior is detected")
print("• **Capacity Planning**: Use insights for better resource allocation strategies")
print("• **Policy Enforcement**: Automatically trigger resource limits or migrations")

print("\nTechnical Insights:")
print("• Isolation Forest effectively captures normal tenant behavior patterns")
print("• Network traffic features provide strong discriminative power")
print("• Unsupervised approach adapts to changing normal behavior over time")
print("• Low false positive rate makes the system practical for production use")

print("\nModel Strengths:")
print("• No labeled training data required (unsupervised learning)")
print("• Scales well with number of tenants and time series length")
print("• Robust to normal variations in tenant behavior")
print("• Provides interpretable anomaly scores for ranking alerts")

print("\nDeployment Considerations:")
print("• Integrate with cloud orchestration platforms (OpenStack, Kubernetes)")
print("• Set up real-time data pipelines for network metrics collection")
print("• Implement feedback loops to reduce false positives over time")
print("• Consider ensemble methods for improved robustness")

print(f"\nRecommendations:")
if recall > 0.8:
    print("• Excellent recall indicates strong noisy neighbor detection capability")
else:
    print("• Consider tuning contamination parameter or adding more features to improve recall")

if precision > 0.7:
    print("• High precision indicates reliable alerts with minimal false positives")
else:
    print("• Consider post-processing filters to reduce false positive rate")

print("• Deploy gradually with human oversight to validate alerts initially")
print("• Continuously monitor model performance and retrain periodically")
print("• Extend to include additional metrics like CPU, memory, and disk I/O for comprehensive detection")