<a href="https://colab.research.google.com/github/engmariamahmed04/NTI-ML-tasks/blob/main/annomly_detection_task.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
df = pd.read_csv('/content/smart_system_anomaly_dataset.csv')


FileNotFoundError: [Errno 2] No such file or directory: '/content/smart_system_anomaly_dataset.csv'

In [None]:
features = [
    'cpu_usage', 'memory_usage', 'network_in_kb', 'network_out_kb',
    'packet_rate', 'avg_response_time_ms', 'service_access_count',
    'failed_auth_attempts', 'is_encrypted', 'geo_location_variation'
]
X = df[features]

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
true_labels_binary = df['label'].apply(lambda x: 1 if 'Anomaly' in x else 0)


In [None]:
model = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
df['anomaly'] = model.fit_predict(X_scaled)  # -1 = anomaly, 1 = normal

In [None]:
from sklearn.metrics import classification_report

print("=== Isolation Forest ===")
predicted_labels_binary = df['anomaly'].apply(lambda x: 1 if x == -1 else 0)
print(classification_report(true_labels_binary, predicted_labels_binary, labels=[0, 1], target_names=["Normal", "Anomaly"]))

In [None]:
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.0017)  # Approx. fraud ratio
y_pred = lof.fit_predict(X_scaled)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import numpy as np

# Map ground truth labels to binary: 1 = Anomaly, 0 = Normal
true_labels_binary = df['label'].apply(lambda x: 1 if 'Anomaly' in x else 0)

# Use Isolation Forest predicted labels (0 for normal, 1 for anomaly)
predicted_labels = np.where(df['anomaly'] == -1, 1, 0)

# Classification report
report = classification_report(true_labels_binary, predicted_labels, target_names=["Normal", "Anomaly"])
print("Classification Report:\n", report)

In [None]:

fpr, tpr, _ = roc_curve(y, df['anomaly'])
roc_auc = roc_auc_score(y, df['anomaly'])

plt.figure(figsize=(6,4))
plt.plot(fpr, tpr, label=f'LOF (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Local Outlier Factor')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import gradio as gr
import numpy as np


def predict_anomaly(cpu_usage, memory_usage, network_in_kb, network_out_kb,
                    packet_rate, avg_response_time_ms, service_access_count,
                    failed_auth_attempts, is_encrypted, geo_location_variation):

    input_data = np.array([[cpu_usage, memory_usage, network_in_kb, network_out_kb,
                            packet_rate, avg_response_time_ms, service_access_count,
                            failed_auth_attempts, is_encrypted, geo_location_variation]])

    input_scaled = scaler.transform(input_data)
    prediction = model.predict(input_scaled)

    if prediction[0] == -1:
        return "🔴 Anomaly Detected"
    else:
        return "🟢 Normal Behavior"


inputs = [
    gr.Slider(0, 100, label="CPU Usage"),
    gr.Slider(0, 100, label="Memory Usage"),
    gr.Slider(0, 5000, label="Network In (KB)"),
    gr.Slider(0, 5000, label="Network Out (KB)"),
    gr.Slider(0, 1000, label="Packet Rate"),
    gr.Slider(0, 2000, label="Avg Response Time (ms)"),
    gr.Slider(0, 10000, label="Service Access Count"),
    gr.Slider(0, 100, label="Failed Auth Attempts"),
    gr.Radio([0, 1], label="Is Encrypted (0=No, 1=Yes)"),
    gr.Slider(0, 100, label="Geo Location Variation")
]

gr.Interface(
    fn=predict_anomaly,
    inputs=inputs,
    outputs="text",
    title="Anomaly Detection System",
    description="Use the sliders to simulate system behavior and detect if it's anomalous."
).launch(share=True)