In [1]:
import json
import joblib
import numpy as np
from catboost import CatBoostRegressor

# Load models and feature names
ridge = joblib.load('Models/baseline_model_v7/ridge_v7.joblib')
catboost = CatBoostRegressor()
catboost.load_model('Models/baseline_model_v7/catboost_v7.cbm')

with open('Models/baseline_model_v7/feature_cols_v7.json', 'r') as f:
    feature_cols = json.load(f)

# Get feature importances
ridge_coef = np.abs(ridge.coef_)
ridge_importance = ridge_coef / ridge_coef.sum()  # Normalize

catboost_importance = catboost.get_feature_importance()
catboost_importance = catboost_importance / catboost_importance.sum()  # Normalize

# Average for ensemble
ensemble_importance = (ridge_importance + catboost_importance) / 2

# Create output dictionary
output = {
    "model": "Avg (Ridge+CB)",
    "mae": 198.15,
    "feature_importance": dict(zip(feature_cols, ensemble_importance.tolist())),
    "ridge_importance": dict(zip(feature_cols, ridge_importance.tolist())),
    "catboost_importance": dict(zip(feature_cols, catboost_importance.tolist()))
}

# Save to JSON
with open('Models/baseline_model_v7/feature_importance_v7.json', 'w') as f:
    json.dump(output, f, indent=2)

print("Saved to Models/baseline_model_v7/feature_importance_v7.json")
print("\nTop features (ensemble):")
sorted_features = sorted(output["feature_importance"].items(), key=lambda x: x[1], reverse=True)
for feat, imp in sorted_features:
    print(f"  {feat}: {imp:.4f}")

Saved to Models/baseline_model_v7/feature_importance_v7.json

Top features (ensemble):
  is_dec_holiday: 0.1477
  opponent_attendance: 0.1263
  month_sin: 0.1150
  game_progress: 0.1011
  sunday_opp_adj: 0.0934
  spieltag: 0.0724
  hour: 0.0714
  weekday_cos: 0.0562
  sunday_boost: 0.0479
  is_top_opponent: 0.0459
  weekday_sin: 0.0377
  distance_log: 0.0325
  holiday_score: 0.0306
  sunday_top: 0.0220


In [2]:
# I want to store the feature importance values as sorted in the json file
with open('Models/baseline_model_v7/feature_importance_sorted_v7.json', 'w') as f:
    json.dump({"model": output["model"],
               "mae": output["mae"],
               "feature_importance": dict(sorted_features),
               "ridge_importance": output["ridge_importance"],
               "catboost_importance": output["catboost_importance"]}, f, indent=2)