# Random Forest Classifier Training
Train a Random Forest Classifier on detected anomalies.

## Step 1: Import Libraries
Import all necessary libraries for model training and evaluation.

In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib
import os

# Assume `all_results` dictionary already exists from anomaly detection
    

## Step 2: Prepare the Dataset
Merge all processed devices into a single DataFrame.

In [None]:

# Merge all processed device DataFrames
combined_df = pd.concat(all_results.values(), ignore_index=True)

# Drop rows with missing values
combined_df = combined_df.dropna()

# Features (exclude non-sensor columns)
EXCLUDE_FEATURES = ['timestamp', 'reconstruction_error', 'is_anomaly', 'label']
features = [col for col in combined_df.columns if col not in EXCLUDE_FEATURES]
X = combined_df[features]
y = combined_df['is_anomaly'].astype(int)  # Target label: 0 = normal, 1 = anomaly
    

## Step 3: Train-Test Split
Split into training and testing datasets.

In [None]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
    

## Step 4: Train Random Forest Model
Initialize and fit the classifier.

In [None]:

# Initialize Random Forest Classifier
rf_classifier = RandomForestClassifier(
    n_estimators=100,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)

# Train the classifier
rf_classifier.fit(X_train, y_train)
    

## Step 5: Evaluate the Model
Predict and print classification metrics.

In [None]:

# Predict on the test set
y_pred = rf_classifier.predict(X_test)

# Print evaluation report
print("Classification Report:")
print(classification_report(y_test, y_pred, digits=4))
    

## Step 6: Save the Model
Save the trained classifier to disk using Joblib.

In [None]:

# Create save directory if it doesn't exist
SAVE_DIR = "../test_cases/saved_models/"
os.makedirs(SAVE_DIR, exist_ok=True)

# Save model to file
model_path = os.path.join(SAVE_DIR, "random_forest_anomaly_classifier.pkl")
joblib.dump(rf_classifier, model_path)

print(f"✅ Model saved successfully to: {model_path}")
    