# Model testing

### Imports

In [1]:
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import os

### Variables

In [2]:
target_col = "movie_score"
drop_cols = ["movie_score", "averageRating", "numVotes", "Unnamed: 0", "_orig_order"]

### Retrieve data

In [None]:
testDatasetPath = os.path.join('.', 'data', 'test_dataset.csv')
final_model = joblib.load("random_forest_reg_movie_log_transformed.joblib")
df_test = pd.read_csv(testDatasetPath, sep=";")


### Prepare test data

In [None]:
# Load model and data
y_true = df_test[target_col].to_numpy()
y_true_original = np.expm1(y_true)
X_test = df_test.drop(columns=[c for c in drop_cols if c in df_test.columns])



### Create prediction on test

In [None]:
# Predict
y_pred = final_model.predict(X_test)
y_pred_original = np.expm1(y_pred)

### Calculate MAE, RMSE and R^2

In [None]:
# Metrics (only if ground truth available)
if y_true is not None:
    mae = mean_absolute_error(y_true_original, y_pred_original)
    rmse = np.sqrt(mean_squared_error(y_true_original, y_pred_original))
    r2 = r2_score(y_true, y_pred)
    print(f"MAE:  {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R2:   {r2:.4f}")
else:
    print("Ground-truth 'movie_score' not found in test set; only predictions are available.")


## Bin testing

In [None]:
bin_edges = [0, 10, 50, 100, 500, 1000, 5000, 10000, 20000, 40000, np.inf]
bin_labels = ["0-10", "10-50", "50-100", "100-500", "500-1000", "1000-5000", "5000-10000", "10000-20000", "20000-40000", "40000+"]

eval_df = pd.DataFrame({
    "y_true": y_true_original,
    "y_pred": y_pred_original
})

# Put each row into a bin based on TRUE score (recommended)
eval_df["score_bin"] = pd.cut(
    eval_df["y_true"],
    bins=bin_edges,
    labels=bin_labels,
    right=False,          # [low, high)
    include_lowest=True
)

def metrics_for_slice(y_t, y_p):
    return {
        "n": len(y_t),
        "mae": mean_absolute_error(y_t, y_p),
        "rmse": np.sqrt(mean_squared_error(y_t, y_p)),
        "mean_true": float(np.mean(y_t)),
        "mean_pred": float(np.mean(y_p)),
        "median_abs_err": float(np.median(np.abs(y_p - y_t))),
    }

bin_results = (
    eval_df.dropna(subset=["score_bin"])
           .groupby("score_bin", observed=True)
           .apply(lambda g: pd.Series(metrics_for_slice(g["y_true"].values, g["y_pred"].values)))
           .reset_index()
)

print("\nPer-bin performance (binned by TRUE movie_score):")
print(bin_results.to_string(index=False))


In [None]:
bin_results.to_csv("bin_results.csv", index=False)

In [None]:
eval_df["pred_bin"] = pd.cut(
    eval_df["y_pred"],
    bins=bin_edges,
    labels=bin_labels,
    right=False,
    include_lowest=True
)

pred_bin_results = (
    eval_df.dropna(subset=["pred_bin"])
           .groupby("pred_bin", observed=True)
           .apply(lambda g: pd.Series(metrics_for_slice(g["y_true"].values, g["y_pred"].values)))
           .reset_index()
)

print("\nPer-bin performance (binned by PREDICTED movie_score):")
print(pred_bin_results.to_string(index=False))


In [None]:
pred_bin_results.to_csv('performance_binned_by_predicted.csv', index=False)

### Create a preview dataframe

In [None]:
# Preview predictions
preview = pd.DataFrame({"predicted_movie_score": y_pred, "residual": y_pred - y_true})
if y_true is not None:
    preview.insert(0, "actual_movie_score", y_true)


### Create a dataframe of feature importance

In [None]:
importances = final_model.feature_importances_
feature_names = X_test.columns

# Create sorted DataFrame
importance_df = (
    pd.DataFrame({
        "feature": feature_names,
        "importance": importances
    })
    .sort_values("importance", ascending=False)
)
print(importance_df.head(10))

In [None]:
# Visualize top 20 feature importances
plt.figure(figsize=(10, 8))
top_features = importance_df.head(20)
plt.barh(range(len(top_features)), top_features['importance'])
plt.yticks(range(len(top_features)), top_features['feature'])
plt.xlabel('Feature Importance')
plt.title('Top 20 Feature Importances - Random Forest')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()


### Graph over residuals

In [None]:
residuals = y_pred - y_true

plt.figure()
plt.scatter(y_true, residuals, alpha=0.3)
plt.axhline(0)
plt.xlabel("True popularity")
plt.ylabel("Residual (pred - true)")
plt.title("Residuals vs true popularity")
plt.show()


### Bias plot

In [None]:
ratio = y_pred / (y_true + 1e-9)
plt.figure()
plt.hist(np.log10(ratio), bins=50)
plt.axvline(0)  # log10(1)
plt.xlabel("log10(pred / true)")
plt.title("Prediction bias distribution")
plt.show()

