## Model Evaluation


In [None]:
import pandas as pd
import numpy as np
import joblib
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

sns.set_theme(context="notebook", style="whitegrid")


In [None]:
model_path = Path("models/random_forest.joblib")
if not model_path.exists():
    raise FileNotFoundError("Missing models/random_forest.joblib. Run train.ipynb first.")

rf_model = joblib.load(model_path)

test_df = pd.read_csv("preprocessed_test.csv.gz")
X_test = test_df.drop("Cover_Type", axis=1)
y_test = test_df["Cover_Type"]


In [None]:
y_pred = rf_model.predict(X_test)

metrics_df = pd.DataFrame(
    {
        "Metric": [
            "Accuracy",
            "Precision (weighted)",
            "Precision (macro)",
            "Recall (weighted)",
            "Recall (macro)",
            "F1 (weighted)",
            "F1 (macro)",
        ],
        "Score": [
            accuracy_score(y_test, y_pred),
            precision_score(y_test, y_pred, average="weighted"),
            precision_score(y_test, y_pred, average="macro"),
            recall_score(y_test, y_pred, average="weighted"),
            recall_score(y_test, y_pred, average="macro"),
            f1_score(y_test, y_pred, average="weighted"),
            f1_score(y_test, y_pred, average="macro"),
        ],
    }
)

metrics_df["Score"] = metrics_df["Score"].round(4)
metrics_df


In [None]:
report = classification_report(y_test, y_pred, output_dict=True)

per_class_df = (
    pd.DataFrame(report)
    .transpose()
    .drop(index=["accuracy", "macro avg", "weighted avg"])
    .reset_index()
    .rename(columns={"index": "Cover_Type"})
)

per_class_df["Cover_Type"] = per_class_df["Cover_Type"].astype(int)
per_class_df[["precision", "recall", "f1-score"]] = per_class_df[["precision", "recall", "f1-score"]].round(4)
per_class_df["support"] = per_class_df["support"].astype(int)
per_class_df.sort_values("Cover_Type", inplace=True)
per_class_df


In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.tight_layout()
plt.show()


In [None]:
importances = pd.Series(rf_model.feature_importances_, index=X_test.columns)

top_n = 15
subset = importances.sort_values(ascending=False).head(top_n)

plt.figure(figsize=(9, 6))
sns.barplot(x=subset.values, y=subset.index, orient="h", palette="viridis")
plt.xlabel("Importance")
plt.ylabel("Feature")
plt.title(f"Top {top_n} Feature Importances")
plt.tight_layout()
plt.show()


The tables above capture overall scoring and per-class performance, while the plots highlight where predictions concentrate and which features drive the model.
