In [1]:
import os
import pandas as pd
import numpy as np
import shap
import xgboost as xgb
from sqlalchemy import create_engine

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# 2. Connect to Postgres and load the wtg_features table
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://wind_user:windy@localhost:5432/wind_db")
engine = create_engine(DATABASE_URL)
df = pd.read_sql_table("wtg_features", con=engine)

In [15]:
# 2. Drop any text/object columns
for col in ["nor_1", "nor_2", "remarks"]:
    if col in df.columns:
        df.drop(columns=col, inplace=True)

In [16]:
# 3. Build your binary target
df = df.sort_values(["turbine_id", "log_date"])
df["will_fault_occur"] = (
    df.groupby("turbine_id")["downtime_hrs"]
      .shift(-1).fillna(0).gt(0).astype(int)
)
df.dropna(subset=["will_fault_occur"], inplace=True)

In [17]:
# 4. Select only numeric features
exclude = {"dgr_id_no","log_date","turbine_id","will_fault_occur"}
feature_cols = [
    c for c in df.select_dtypes(include=["number","bool"]).columns
    if c not in exclude
]
X = df[feature_cols]

In [18]:
# 5. Load your final model
model = xgb.Booster()
model.load_model("models/xgb_fault_classifier_final.json")

In [19]:
# 6. Compute SHAP values
explainer = shap.TreeExplainer(model, feature_perturbation="tree_path_dependent")
shap_exp = explainer(X)             # returns an Explanation
shap_vals = shap_exp.values         # numpy array shape=(n_samples, n_features)

In [20]:
# 7. Global importance
global_imp = np.abs(shap_vals).mean(axis=0)
global_imp_df = pd.DataFrame({
    "feature": feature_cols,
    "importance": global_imp
}).sort_values("importance", ascending=False)

print(global_imp_df.head(10))
# Optionally save:
global_imp_df.to_csv("global_shap_importance.csv", index=False)

           feature  importance
13            mtbf    0.153898
9     downtime_hrs    0.100782
0        gen_units    0.089622
10    availability    0.081679
4       fault_time    0.057813
12            mttr    0.050854
2   avg_wind_speed    0.044669
3         lull_hrs    0.028308
11     plf_percent    0.024454
7    ext_grid_down    0.018875
