In [13]:
# Librerias necesarias
import os, joblib, json
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from lime.lime_tabular import LimeTabularExplainer
from sklearn.cluster import KMeans 
import shap
from datetime import datetime
from PIL import Image
import pyfiglet
import tqdm 


In [14]:
# ====== CONFIG de rutas donde estan los datos ======
# Rutas donde se encuentran los datos y el modelo
MODEL_PATH = r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\code_modelo\svm_rbf_pipeline.joblib" # modelo SVM RBF
# Este modelo es el que mejor resultado dio en los entrenamientos
SELECTED_COLS_PATH = r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\processed_data\data_bin\selected_columns.txt" # columnas seleccionadas
# CSV de test con las características extraídas
TEST_CSV_PATH = r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\processed_data\data_bin\features_test_curado.csv" # CSV de test con las características extraídas
# Directorios donde buscar las imágenes
IMG_SEARCH_DIRS = [
    r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\processed_data\data_bin\train",
    r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\processed_data\data_bin\val",
    r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\processed_data\data_bin\test",
    r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\processed_data\data_bin\no_etiquetadas"
]
# Directorio de salida para las explicaciones
# Se crea si no existe
OUTPUT_DIR = r"./explicabilidad" # se crea un directorio local para guardar las explicaciones
# ===================================================
SAMPLE_IDX = 0  # índice del ejemplo a explicar
N_BACKGROUND = 100 # número de muestras de fondo para SHAP
# Número de características a mostrar en LIME y SHAP
N_FEATURES_SHOW = 12
# =====================================

# --- ADICIÓN ---
import os, joblib, json, re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from lime.lime_tabular import LimeTabularExplainer
import shap
from sklearn.cluster import KMeans
from datetime import datetime
from PIL import Image
import pyfiglet

os.makedirs(OUTPUT_DIR, exist_ok=True)

# 1) Cargar modelo y columnas seleccionadas
pipe = joblib.load(MODEL_PATH)
with open(SELECTED_COLS_PATH, "r", encoding="utf-8") as f:
    selected_cols = [ln.strip() for ln in f if ln.strip()]

# 2) Cargar CSV de test
df_test = pd.read_csv(TEST_CSV_PATH)
X_test = df_test[selected_cols].copy()
y_test = df_test["label"] if "label" in df_test.columns else None
file_names = df_test["file"].tolist()

# --- ADICIÓN --- usar arrays float32 para evitar warnings y bajar RAM
X_test_arr = X_test.values.astype(np.float32)

# --- NOMBRES AMIGABLES ---
# Parámetros REALES usados en tu extracción (del script que enviaste)
# 1080x720, HOG_ORI=6, HOG_PPC=(60,60), HOG_CPB=(2,2) -> 18x12 celdas; 17x11 bloques; 24 feats/bloque = 4488
HOG_ORI = 6
CELLS_X = 18  # 1080/60
CELLS_Y = 12  # 720/60
BLOCK_CX, BLOCK_CY = 2, 2  # cells_per_block
NBLOCKS_X = CELLS_X - BLOCK_CX + 1  # 17
NBLOCKS_Y = CELLS_Y - BLOCK_CY + 1  # 11

def build_hog_feature_names(orientations=HOG_ORI):
    """
    Genera nombres en el MISMO ORDEN que skimage.feature.hog(feature_vector=True)
    Orden esperado: for by in 0..NBLOCKS_Y-1, bx in 0..NBLOCKS_X-1,
                    for cy in 0..BLOCK_CY-1, cx in 0..BLOCK_CX-1,
                    for b in 0..orientations-1
    """
    names = []
    for by in range(NBLOCKS_Y):
        for bx in range(NBLOCKS_X):
            for cy in range(BLOCK_CY):
                for cx in range(BLOCK_CX):
                    cell_y = by + cy
                    cell_x = bx + cx
                    for b in range(orientations):
                        theta = int(round(180 / orientations * b))  # [0,180)
                        names.append(f"HOG θ≈{theta}° @ celda ({cell_y},{cell_x})")
    return names  # len=4488

# HSV de tu extractor: 16 de H + 12 de S (densidades normalizadas)
HSV_BINS_H, HSV_BINS_S = 16, 12

def pretty_hsv_name(i):
    if i < HSV_BINS_H:
        # Hue en [0,180)
        width = 180 / HSV_BINS_H
        lo = int(i * width); hi = int((i+1)*width)
        return f"Hue[{lo}–{hi}°]"
    else:
        j = i - HSV_BINS_H
        width = 256 / HSV_BINS_S
        lo = int(j * width); hi = int((j+1)*width)
        return f"Saturación[{lo}–{hi}]"

# LBP (uniform, 10 bins)
def pretty_lbp_name(i):
    return f"LBP bin {i}"

# GLCM props en TU orden
GLCM_PROPS = ["contrast", "dissimilarity", "homogeneity", "ASM", "energy", "correlation"]
def pretty_glcm_name(i):
    if 0 <= i < len(GLCM_PROPS):
        return f"GLCM {GLCM_PROPS[i]}"
    return f"GLCM {i}"

def make_friendly_names(cols):
    hog_names = build_hog_feature_names(HOG_ORI)
    out = []
    for c in cols:
        if c.startswith("hog_"):
            k = int(c.split("_",1)[1])
            if 0 <= k < len(hog_names):
                out.append(hog_names[k])
            else:
                out.append("HOG")
        elif c.startswith("hsv_"):
            out.append(pretty_hsv_name(int(c.split("_",1)[1])))
        elif c.startswith("lbp_"):
            out.append(pretty_lbp_name(int(c.split("_",1)[1])))
        elif c.startswith("glcm_"):
            out.append(pretty_glcm_name(int(c.split("_",1)[1])))
        elif c == "edge_density":
            out.append("Densidad de bordes")
        elif c == "laplacian_var":
            out.append("Varianza del Laplaciano (enfoque)")
        else:
            # Default: humanizar
            out.append(re.sub(r"_+", " ", c).strip().capitalize())
    return out

friendly_cols = make_friendly_names(selected_cols)

# 3) Función para buscar ruta completa de imagen (igual)
def find_image_path(filename):
    for base_dir in IMG_SEARCH_DIRS:
        candidate = os.path.join(base_dir, filename)
        if os.path.exists(candidate):
            return candidate
    return None

# 4) Predicciones rápidas — usar arrays al pipeline
probas = pipe.predict_proba(X_test_arr)[:, 1] if hasattr(pipe, "predict_proba") else None
preds = pipe.predict(X_test_arr)

# 5) LIME (usar arrays y nombres amigables)
explainer = LimeTabularExplainer(
    training_data=X_test_arr,
    feature_names=friendly_cols,  # <<< nombres amigables
    class_names=["sana", "defectuosa"],
    discretize_continuous=True,
    mode="classification"
)

# --- ADICIÓN ---
def _predict_fn_lime(x):
    X_arr = np.asarray(x, dtype=np.float32)
    if hasattr(pipe, "predict_proba"):
        return pipe.predict_proba(X_arr)
    from scipy.special import expit
    s = pipe.decision_function(X_arr)
    if s.ndim == 1:
        p1 = expit(s)
        return np.vstack([1 - p1, p1]).T
    return s

exp = explainer.explain_instance(
    data_row=X_test_arr[SAMPLE_IDX],
    predict_fn=_predict_fn_lime,  # --- ADICIÓN ---
    num_features=N_FEATURES_SHOW
)

ts = datetime.now().strftime("%Y%m%d_%H%M%S")
lime_html = os.path.join(OUTPUT_DIR, f"lime_idx{SAMPLE_IDX}_{ts}.html")
exp.save_to_file(lime_html)

# Barra LIME (LIME ya devuelve los nombres friendly)
weights = exp.as_list()
feat_names, feat_vals = zip(*weights)
plt.figure(figsize=(6, 4))
plt.barh(range(len(feat_vals)), feat_vals)
plt.yticks(range(len(feat_vals)), feat_names)
plt.title(f"LIME - idx {SAMPLE_IDX}")
plt.tight_layout()
lime_bar_path = os.path.join(OUTPUT_DIR, f"lime_bar_idx{SAMPLE_IDX}_{ts}.png")
plt.savefig(lime_bar_path, dpi=200)
plt.close()

# 6) Mostrar imagen original junto a LIME (igual)
img_path = find_image_path(file_names[SAMPLE_IDX])
if img_path:
    img = Image.open(img_path)
    plt.figure(figsize=(9, 4))
    plt.subplot(1, 2, 1)
    plt.imshow(img)
    plt.axis("off")
    plt.title("Imagen original")

    plt.subplot(1, 2, 2)
    plt.barh(range(len(feat_vals)), feat_vals)
    plt.yticks(range(len(feat_vals)), feat_names)
    plt.title("LIME pesos")
    plt.tight_layout()
    combined_path = os.path.join(OUTPUT_DIR, f"lime_image_idx{SAMPLE_IDX}_{ts}.png")
    plt.savefig(combined_path, dpi=200)
    plt.close()

# 7) SHAP Kernel — usar KMeans (sklearn) + extracción robusta de clase positiva
rng = np.random.default_rng(42)
bg_n = min(20, len(X_test_arr))  # 20 centros; baja a 10 si hace falta
kmeans = KMeans(n_clusters=bg_n, random_state=42, n_init="auto")
centers = kmeans.fit(X_test_arr).cluster_centers_.astype(np.float32)

def predict_proba_local(X):
    X_arr = np.asarray(X, dtype=np.float32)
    if hasattr(pipe, "predict_proba"):
        return pipe.predict_proba(X_arr)
    from scipy.special import expit
    s = pipe.decision_function(X_arr)
    if s.ndim == 1:
        p1 = expit(s)
        return np.vstack([1 - p1, p1]).T
    return s

kernel_explainer = shap.KernelExplainer(predict_proba_local, centers, link="logit")

# --- ADICIÓN --- helpers para obtener SIEMPRE la clase positiva
def _sv_pos(shap_values, class_index=1):
    if isinstance(shap_values, list):
        return np.asarray(shap_values[class_index])[0]
    arr = np.asarray(shap_values)
    if arr.ndim == 3:
        return arr[0, :, class_index]
    elif arr.ndim == 2:
        return arr[0, :]
    else:
        raise ValueError(f"Forma inesperada de shap_values: {arr.shape}")

def _sv_matrix_pos(shap_values, class_index=1):
    if isinstance(shap_values, list):
        return np.asarray(shap_values[class_index])
    arr = np.asarray(shap_values)
    if arr.ndim == 3:
        return arr[:, :, class_index]
    elif arr.ndim == 2:
        return arr
    else:
        raise ValueError(f"Forma inesperada de shap_values: {arr.shape}")

# Local (1 caso) con nsamples limitado
shap_values_local = kernel_explainer.shap_values(X_test_arr[[SAMPLE_IDX]], nsamples=400)
sv_local_pos = _sv_pos(shap_values_local, class_index=1)
contrib = pd.Series(sv_local_pos, index=friendly_cols).sort_values(key=np.abs, ascending=False)

# SHAP summary global: subset + nsamples limitado
subset_idx = rng.choice(len(X_test_arr), size=min(200, len(X_test_arr)), replace=False)
X_subset = X_test_arr[subset_idx]
shap_values_subset = kernel_explainer.shap_values(X_subset, nsamples=300)
sv_subset_pos = _sv_matrix_pos(shap_values_subset, class_index=1)

# Plots SHAP (global) con nombres amigables
plt.figure()
shap.summary_plot(sv_subset_pos, X_subset, feature_names=friendly_cols, show=False, plot_type="bar", max_display=N_FEATURES_SHOW)
plt.tight_layout()
shap_summary_bar_path = os.path.join(OUTPUT_DIR, f"shap_summary_bar_{ts}.png")
plt.savefig(shap_summary_bar_path, dpi=200, bbox_inches="tight")
plt.close()

plt.figure()
shap.summary_plot(sv_subset_pos, X_subset, feature_names=friendly_cols, show=False, max_display=N_FEATURES_SHOW)
plt.tight_layout()
shap_summary_dot_path = os.path.join(OUTPUT_DIR, f"shap_summary_dot_{ts}.png")
plt.savefig(shap_summary_dot_path, dpi=200, bbox_inches="tight")
plt.close()

# --- NOMBRES AMIGABLES --- leyenda para slides
pd.DataFrame({"feature_id": selected_cols, "feature_name": friendly_cols}) \
    .to_csv(os.path.join(OUTPUT_DIR, f"feature_legend_{ts}.csv"), index=False, encoding="utf-8")

# 8) Guardar metadatos (igual + extras)
meta = {
    "model_path": MODEL_PATH,
    "sample_idx": int(SAMPLE_IDX),
    "pred_sample": int(preds[SAMPLE_IDX]),
    "prob_sample": float(probas[SAMPLE_IDX]) if probas is not None else None,
    "y_true_sample": int(y_test.iloc[SAMPLE_IDX]) if y_test is not None else None,
    "image_path": img_path,
    "lime_html": lime_html,
    "lime_bar_png": lime_bar_path,
    "lime_combined_png": combined_path if img_path else None,
    "shap_summary_bar_png": shap_summary_bar_path,
    "shap_summary_dot_png": shap_summary_dot_path,
    "bg_method": f"sklearn.KMeans_{bg_n}",
    "shap_nsamples_local": 400,
    "shap_nsamples_global": 300,
    "hog_params": {"orientations": HOG_ORI, "cells_x": CELLS_X, "cells_y": CELLS_Y, "cells_per_block": (BLOCK_CY,BLOCK_CX)}
}
with open(os.path.join(OUTPUT_DIR, f"explain_meta_{ts}.json"), "w", encoding="utf-8") as f:
    json.dump(meta, f, indent=2, ensure_ascii=False)

print(pyfiglet.figlet_format("Explicaciones Generadas", font="digital"))
print("Listo. Explicaciones en:", OUTPUT_DIR)


100%|██████████| 1/1 [00:03<00:00,  3.18s/it]
100%|██████████| 94/94 [03:44<00:00,  2.38s/it]


+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+
|E|x|p|l|i|c|a|c|i|o|n|e|s| |G|e|n|e|r|a|d|a|s|
+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+

Listo. Explicaciones en: ./explicabilidad


In [20]:
# -*- coding: utf-8 -*-
"""
Explicabilidad (LIME/SHAP) con etiquetas HOG legibles y LIME exportado a PNG.
"""

# ====== CONFIG ======
MODEL_PATH = r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\code_modelo\svm_rbf_pipeline.joblib"
SELECTED_COLS_PATH = r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\processed_data\data_bin\selected_columns.txt"
TEST_CSV_PATH = r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\processed_data\data_bin\features_test_curado.csv"

IMG_SEARCH_DIRS = [
    r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\processed_data\data_bin\train",
    r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\processed_data\data_bin\val",
    r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\processed_data\data_bin\test",
    r"C:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\processed_data\data_bin\no_etiquetadas"
]

OUTPUT_DIR = r"./explicabilidad"

SAMPLE_IDX = 0
N_BACKGROUND = 100
N_FEATURES_SHOW = 12
# =====================

import os, re, json, joblib, warnings
from datetime import datetime
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

import shap
from lime.lime_tabular import LimeTabularExplainer
from sklearn.cluster import KMeans

warnings.filterwarnings("ignore")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ===================== utilidades de nombres =====================

# HOG reales de tu extracción (1080x720, ppc=(60,60), cpb=(2,2))
HOG_ORI = 6
CELLS_X = 18
CELLS_Y = 12
BLOCK_CX, BLOCK_CY = 2, 2
NBLOCKS_X = CELLS_X - BLOCK_CX + 1  # 17
NBLOCKS_Y = CELLS_Y - BLOCK_CY + 1  # 11

HSV_BINS_H, HSV_BINS_S = 16, 12
GLCM_PROPS = ["contrast", "dissimilarity", "homogeneity", "ASM", "energy", "correlation"]

def _h_band(rx):
    return "izquierda" if rx < 1/3 else ("centro" if rx < 2/3 else "derecha")

def _v_band(ry):
    return "superior" if ry < 1/3 else ("medio" if ry < 2/3 else "inferior")

def build_hog_feature_names(orientations=HOG_ORI):
    """
    Mismo orden que skimage.hog(feature_vector=True).
    Nombres añaden región semántica: izquierda/centro/derecha + superior/medio/inferior.
    """
    names = []
    for by in range(NBLOCKS_Y):
        for bx in range(NBLOCKS_X):
            for cy in range(BLOCK_CY):
                for cx in range(BLOCK_CX):
                    cell_y = by + cy
                    cell_x = bx + cx
                    rx = cell_x / max(CELLS_X - 1, 1)
                    ry = cell_y / max(CELLS_Y - 1, 1)
                    hb, vb = _h_band(rx), _v_band(ry)
                    for b in range(orientations):
                        theta = int(round(180 / orientations * b))
                        names.append(
                            f"HOG θ≈{theta}° | región {hb}-{vb} (celda y={cell_y}, x={cell_x})"
                        )
    return names  # 4488

def pretty_hsv_name(i):
    if i < HSV_BINS_H:
        width = 180 / HSV_BINS_H
        lo = int(i * width); hi = int((i+1)*width)
        return f"Hue[{lo}–{hi}°]"
    else:
        j = i - HSV_BINS_H
        width = 256 / HSV_BINS_S
        lo = int(j * width); hi = int((j+1)*width)
        return f"Saturación[{lo}–{hi}]"

def pretty_lbp_name(i): return f"LBP bin {i}"

def pretty_glcm_name(i):
    return f"GLCM {GLCM_PROPS[i]}" if 0 <= i < len(GLCM_PROPS) else f"GLCM {i}"

def make_friendly_names(cols):
    hog_names = build_hog_feature_names(HOG_ORI)
    out = []
    for c in cols:
        if c.startswith("hog_"):
            k = int(c.split("_",1)[1])
            out.append(hog_names[k] if 0 <= k < len(hog_names) else "HOG")
        elif c.startswith("hsv_"):
            out.append(pretty_hsv_name(int(c.split("_",1)[1])))
        elif c.startswith("lbp_"):
            out.append(pretty_lbp_name(int(c.split("_",1)[1])))
        elif c.startswith("glcm_"):
            out.append(pretty_glcm_name(int(c.split("_",1)[1])))
        elif c == "edge_density":
            out.append("Densidad de bordes")
        elif c == "laplacian_var":
            out.append("Varianza del Laplaciano (enfoque)")
        else:
            out.append(re.sub(r"_+", " ", c).strip().capitalize())
    return out

def find_image_path(filename):
    for base_dir in IMG_SEARCH_DIRS:
        candidate = os.path.join(base_dir, filename)
        if os.path.exists(candidate):
            return candidate
    return None

# ===================== cargar artefactos =====================

pipe = joblib.load(MODEL_PATH)
with open(SELECTED_COLS_PATH, "r", encoding="utf-8") as f:
    selected_cols = [ln.strip() for ln in f if ln.strip()]

df_test = pd.read_csv(TEST_CSV_PATH)
X_test = df_test[selected_cols].copy()
y_test = df_test["label"] if "label" in df_test.columns else None
file_names = df_test["file"].tolist()

X_test_arr = X_test.values.astype(np.float32)
friendly_cols = make_friendly_names(selected_cols)

# ===================== predicciones =====================

probas = pipe.predict_proba(X_test_arr)[:, 1] if hasattr(pipe, "predict_proba") else None
preds = pipe.predict(X_test_arr)

# ===================== LIME -> PNG =====================

def _predict_fn_lime(x):
    X_arr = np.asarray(x, dtype=np.float32)
    if hasattr(pipe, "predict_proba"):
        return pipe.predict_proba(X_arr)
    from scipy.special import expit
    s = pipe.decision_function(X_arr)
    if s.ndim == 1:
        p1 = expit(s)
        return np.vstack([1 - p1, p1]).T
    return s

explainer = LimeTabularExplainer(
    training_data=X_test_arr,
    feature_names=friendly_cols,
    class_names=["danada","sana"],   # respeta el orden 0/1 del dataset
    discretize_continuous=True,
    mode="classification",
    random_state=42
)

exp = explainer.explain_instance(
    data_row=X_test_arr[SAMPLE_IDX],
    predict_fn=_predict_fn_lime,
    num_features=max(20, N_FEATURES_SHOW)  # pedimos más y luego recortamos
)

# Tomamos pares (nombre, peso) y ordenamos por |peso|
lime_items = list(exp.as_list())
lime_items = sorted(lime_items, key=lambda t: abs(t[1]), reverse=True)[:N_FEATURES_SHOW]
feat_names, feat_vals = zip(*lime_items) if lime_items else ([], [])

ts = datetime.now().strftime("%Y%m%d_%H%M%S")

def _plot_lime_bar_png(names, vals, title, out_png, figsize=(9,6)):
    if not names:
        return
    y = np.arange(len(names))
    colors = ["#1f77b4" if w >= 0 else "#d62728" for w in vals]
    fig, ax = plt.subplots(figsize=figsize)
    ax.barh(y, vals, color=colors, edgecolor="none", alpha=0.95)
    ax.set_yticks(y)
    ax.set_yticklabels(names, fontsize=10)
    ax.axvline(0, color="#666", lw=1)
    ax.set_xlabel("Contribución LIME")
    ax.set_title(title)
    fig.tight_layout()
    plt.savefig(out_png, dpi=220)
    plt.close(fig)

lime_bar_path = os.path.join(OUTPUT_DIR, f"lime_bar_idx{SAMPLE_IDX}_{ts}.png")
_plot_lime_bar_png(feat_names, feat_vals, f"LIME - idx {SAMPLE_IDX}", lime_bar_path)

# Versión combinada con la imagen, si existe
img_path = find_image_path(file_names[SAMPLE_IDX])
combined_path = None
if img_path and feat_names:
    img = Image.open(img_path)
    plt.figure(figsize=(11,5))
    plt.subplot(1,2,1); plt.imshow(img); plt.axis("off"); plt.title("Imagen original")
    plt.subplot(1,2,2)
    y = np.arange(len(feat_names))
    colors = ["#1f77b4" if w >= 0 else "#d62728" for w in feat_vals]
    plt.barh(y, feat_vals, color=colors)
    plt.yticks(y, feat_names, fontsize=9)
    plt.axvline(0, color="#666", lw=1)
    plt.title("LIME (top contribuciones)")
    plt.tight_layout()
    combined_path = os.path.join(OUTPUT_DIR, f"lime_image_idx{SAMPLE_IDX}_{ts}.png")
    plt.savefig(combined_path, dpi=220)
    plt.close()

# ===================== SHAP (Kernel) =====================

rng = np.random.default_rng(42)
bg_n = min(20, len(X_test_arr))
kmeans = KMeans(n_clusters=bg_n, random_state=42, n_init=10)
centers = kmeans.fit(X_test_arr).cluster_centers_.astype(np.float32)

def predict_proba_local(X):
    X_arr = np.asarray(X, dtype=np.float32)
    if hasattr(pipe, "predict_proba"):
        return pipe.predict_proba(X_arr)
    from scipy.special import expit
    s = pipe.decision_function(X_arr)
    if s.ndim == 1:
        p1 = expit(s)
        return np.vstack([1 - p1, p1]).T
    return s

kernel_explainer = shap.KernelExplainer(predict_proba_local, centers, link="logit")

def _sv_pos(shap_values, class_index=1):
    if isinstance(shap_values, list):
        return np.asarray(shap_values[class_index])[0]
    arr = np.asarray(shap_values)
    if arr.ndim == 3:
        return arr[0, :, class_index]
    elif arr.ndim == 2:
        return arr[0, :]
    else:
        raise ValueError(f"Forma inesperada de shap_values: {arr.shape}")

def _sv_matrix_pos(shap_values, class_index=1):
    if isinstance(shap_values, list):
        return np.asarray(shap_values[class_index])
    arr = np.asarray(shap_values)
    if arr.ndim == 3:
        return arr[:, :, class_index]
    elif arr.ndim == 2:
        return arr
    else:
        raise ValueError(f"Forma inesperada de shap_values: {arr.shape}")

# Local
shap_values_local = kernel_explainer.shap_values(X_test_arr[[SAMPLE_IDX]], nsamples=400)
sv_local_pos = _sv_pos(shap_values_local, class_index=1)

# Global (subset)
subset_idx = rng.choice(len(X_test_arr), size=min(200, len(X_test_arr)), replace=False)
X_subset = X_test_arr[subset_idx]
shap_values_subset = kernel_explainer.shap_values(X_subset, nsamples=300)
sv_subset_pos = _sv_matrix_pos(shap_values_subset, class_index=1)

# Plots SHAP (global)
plt.figure()
shap.summary_plot(sv_subset_pos, X_subset, feature_names=friendly_cols, show=False,
                  plot_type="bar", max_display=N_FEATURES_SHOW)
plt.tight_layout()
shap_summary_bar_path = os.path.join(OUTPUT_DIR, f"shap_summary_bar_{ts}.png")
plt.savefig(shap_summary_bar_path, dpi=220, bbox_inches="tight"); plt.close()

plt.figure()
shap.summary_plot(sv_subset_pos, X_subset, feature_names=friendly_cols, show=False,
                  max_display=N_FEATURES_SHOW)
plt.tight_layout()
shap_summary_dot_path = os.path.join(OUTPUT_DIR, f"shap_summary_dot_{ts}.png")
plt.savefig(shap_summary_dot_path, dpi=220, bbox_inches="tight"); plt.close()

# ===================== leyenda / metadatos =====================

pd.DataFrame({"feature_id": selected_cols, "feature_name": friendly_cols}) \
  .to_csv(os.path.join(OUTPUT_DIR, f"feature_legend_{ts}.csv"), index=False, encoding="utf-8")

meta = {
    "model_path": MODEL_PATH,
    "sample_idx": int(SAMPLE_IDX),
    "pred_sample": int(preds[SAMPLE_IDX]),
    "prob_sample": float(probas[SAMPLE_IDX]) if probas is not None else None,
    "y_true_sample": int(y_test.iloc[SAMPLE_IDX]) if y_test is not None else None,
    "image_path": img_path,
    "lime_bar_png": lime_bar_path,
    "lime_combined_png": combined_path,
    "shap_summary_bar_png": shap_summary_bar_path,
    "shap_summary_dot_png": shap_summary_dot_path,
    "bg_method": f"sklearn.KMeans_{bg_n}",
    "shap_nsamples_local": 400,
    "shap_nsamples_global": 300,
    "hog_params": {
        "orientations": HOG_ORI,
        "cells_x": CELLS_X, "cells_y": CELLS_Y,
        "cells_per_block": (BLOCK_CY, BLOCK_CX)
    }
}
with open(os.path.join(OUTPUT_DIR, f"explain_meta_{ts}.json"), "w", encoding="utf-8") as f:
    json.dump(meta, f, indent=2, ensure_ascii=False)

print("\n✔️ Explicaciones generadas en:", os.path.abspath(OUTPUT_DIR))


100%|██████████| 1/1 [00:03<00:00,  3.21s/it]
100%|██████████| 94/94 [04:01<00:00,  2.57s/it]



✔️ Explicaciones generadas en: c:\Users\GMADRO04\Documents\PALAS_EOLICAS_ML\explicabilidad
