# Training RandomForest Model

In [1]:
# Import Modules (RandomForest from sklearn)
import os
import json
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib 

In [2]:
# --- Filepaths ---
LOGS_FOLDER = "./training_logs"
MODEL_SAVE_PATH = "./trained_models/random_forest_model.pkl"

In [3]:
# --- Prep Data ---
def load_logs(logs_folder):
    features = []
    labels = []
    for filename in os.listdir(logs_folder):
        if filename.endswith('.json'):
            with open(os.path.join(logs_folder, filename), 'r') as f:
                data = json.load(f)
                for entry in data:
                    pos = np.array(entry["position"])
                    num_tokens = entry["num_reported_tokens"]
                    avg_distance = np.mean([
                        np.linalg.norm(pos - np.array(token))
                        for token in entry["reported_tokens"]
                    ]) if entry["reported_tokens"] else 0
                    features.append(list(pos) + [num_tokens, avg_distance])
                    labels.append(entry["is_byzantine"])
    return np.array(features), np.array(labels)

X, y = load_logs(LOGS_FOLDER)
print(f"Loaded {len(X)} samples.")

Loaded 100000 samples.


In [4]:
# Fit the RandomForest and save the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

preds = clf.predict(X_test)
print(classification_report(y_test, preds))

os.makedirs("trained_models", exist_ok=True)
joblib.dump(clf, MODEL_SAVE_PATH)
print(f"Model saved to {MODEL_SAVE_PATH}")

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     14576
           1       1.00      1.00      1.00      5424

    accuracy                           1.00     20000
   macro avg       1.00      1.00      1.00     20000
weighted avg       1.00      1.00      1.00     20000

Model saved to random_forest_model.pkl
