# Feature Importance Analysis for Polymarket Election Markets

This notebook performs a comprehensive feature importance analysis for Polymarket election market data, using three different models:
1. Gradient Boosting Regressor
2. Random Forest Regressor
3. Linear Regression with L1 Regularization (Lasso)

The analysis includes:
- Model-specific importance
- Permutation importance (model-agnostic)
- SHAP values for the best-performing model
- Consensus feature ranking

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import joblib
from datetime import datetime
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.inspection import permutation_importance
import shap
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

# Set matplotlib style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_style("whitegrid")
sns.set_context("notebook", font_scale=1.2)

# Set up directories
INPUT_DIR = "modified_analysis"
OUTPUT_DIR = "feature_importance_results"
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"Results will be saved to: {OUTPUT_DIR}")

# Create a timestamp for this run
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
print(f"Analysis started at: {timestamp}")