In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# Load the dataset (example: Iris dataset for simplicity)
from sklearn.datasets import load_iris
data = load_iris()
X = data.data

# Train-Test Split (first)
X_train, X_test = train_test_split(X, test_size=0.3, random_state=42)

# Standardization (scale after split)
scaler = StandardScaler()

# Fit the scaler only on the training set
X_train_scaled = scaler.fit_transform(X_train)

# Use the same scaler to transform the test set (no fitting on test data)
X_test_scaled = scaler.transform(X_test)

# Initialize Isolation Forest
iso_forest = IsolationForest(contamination=0.1, random_state=42)

# Fit the model on the scaled training data
iso_forest.fit(X_train_scaled)

# Predict anomalies on the test set
y_pred_train = iso_forest.predict(X_train_scaled)
y_pred_test = iso_forest.predict(X_test_scaled)

# Convert the predictions to binary (1 for normal, -1 for anomaly)
y_pred_train = np.where(y_pred_train == 1, 0, 1)
y_pred_test = np.where(y_pred_test == 1, 0, 1)

# Evaluate the model (if you have true labels for anomalies)
print("Training Set Anomaly Detection Results:")
print(classification_report(y_pred_train, y_pred_train))  # Replace with actual labels if available
print("Test Set Anomaly Detection Results:")
print(classification_report(y_pred_test, y_pred_test))    # Replace with actual labels if available


ModuleNotFoundError: No module named 'numpy'