# Imports

In [7]:
# Core Libraries
import numpy as np
import joblib

# Model
from sklearn.ensemble import RandomForestRegressor

# Evaluation Metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load Preprocessed Data

In [8]:
# Load preprocessed data
X_train = joblib.load(r"D:\Python\SmartSignalAI\data\X_train.pkl")
y_train = joblib.load(r"D:\Python\SmartSignalAI\data\y_train.pkl")

X_val   = joblib.load(r"D:\Python\SmartSignalAI\data\X_val.pkl")
y_val   = joblib.load(r"D:\Python\SmartSignalAI\data\y_val.pkl")

X_test  = joblib.load(r"D:\Python\SmartSignalAI\data\X_test.pkl")
y_test  = joblib.load(r"D:\Python\SmartSignalAI\data\y_test.pkl")

print("Data loaded successfully!")

Data loaded successfully!


# Train FIRST Random Forest

In [9]:
# Initialize Random Forest (baseline configuration)
rf_model = RandomForestRegressor(
    n_estimators=100,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)

# Train on training data ONLY
rf_model.fit(X_train, y_train)

print("Model training completed.")

Model training completed.


# Validation Evaluation

In [10]:
# Validation Predictions
y_val_pred = rf_model.predict(X_val)

# Metrics
mae_val  = mean_absolute_error(y_val, y_val_pred)
rmse_val = np.sqrt(mean_squared_error(y_val, y_val_pred))
r2_val   = r2_score(y_val, y_val_pred)

print("Random Forest - Validation Metrics")
print("MAE :", round(mae_val, 3))
print("RMSE:", round(rmse_val, 3))
print("R²  :", round(r2_val, 3))

# How did you prevent overfitting?” - By tuning based on validation performance, not test.


Random Forest - Validation Metrics
MAE : 4.052
RMSE: 6.275
R²  : 0.67


# Test Evaluation

In [11]:
# Test Predictions 
y_test_pred = rf_model.predict(X_test)

# Metrics
mae_test  = mean_absolute_error(y_test, y_test_pred)
rmse_test = np.sqrt(mean_squared_error(y_test, y_test_pred))
r2_test   = r2_score(y_test, y_test_pred)

print("Random Forest - Test Metrics")
print("MAE :", round(mae_test, 3))
print("RMSE:", round(rmse_test, 3))
print("R²  :", round(r2_test, 3))

# The test set was used only once to report unbiased generalization performance

# On average, your model’s RSRP prediction is off by ~3.9 dBm 
# - ±3–5 dBm error is considered acceptable

# Large prediction errors are still possible, but they are rare and controlled
# RMSE being higher than MAE shows that the model occasionally, encounters difficult propagation scenarios, which is expected in real cellular environments

# R² > 0.6 → Strong model

Random Forest - Test Metrics
MAE : 4.121
RMSE: 6.482
R²  : 0.669


# Save the RF Model

In [12]:
# Save trained Random Forest model
joblib.dump(
    rf_model,
    r"D:\Python\SmartSignalAI\data\random_forest_model.pkl"
)

print("Random Forest model saved successfully!")

Random Forest model saved successfully!
