In [None]:
# Step 1: Import Libraries
import pandas as pd
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt

# Step 2: Load Dataset
# Replace 'network_traffic.csv' with your actual dataset path or filename
df = pd.read_csv('network_traffic.csv')

# Display first few rows to understand data
print(df.head())

# Step 3: Feature Extraction / Selection
# Select numeric columns suitable for anomaly detection
# (Adjust based on actual dataset columns)
features = df.select_dtypes(include=['float64', 'int64'])

# Step 4: Train Isolation Forest
iso_forest = IsolationForest(contamination=0.05, random_state=42)
df['Anomaly'] = iso_forest.fit_predict(features)

# Step 5: Display anomalies
anomalies = df[df['Anomaly'] == -1]
print(f"Number of anomalies detected: {len(anomalies)}")
print(anomalies)

# Step 6: Visualize anomalies (example: plot first two features)
plt.figure(figsize=(10,6))
colors = df['Anomaly'].map({1: 'green', -1: 'red'})
plt.scatter(features.iloc[:,0], features.iloc[:,1], c=colors, alpha=0.6)
plt.xlabel(features.columns[0])
plt.ylabel(features.columns[1])
plt.title('Isolation Forest Anomaly Detection on Network Traffic')
plt.show()

In [None]:
from google.colab import drive
drive.mount('/content/drive')