In [None]:
import shap
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import StandardScaler

# =========================
# 0Ô∏è‚É£ SentenceTransformer for embeddings
# =========================
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

# =========================
# 1Ô∏è‚É£ Pick the instance to explain
# =========================
instance_idx = 1
instance_row = focus_df.iloc[instance_idx]
text_instance = instance_row['Content']
instance_total = instance_row['Total_Count']
print("üìÖ Date of instance:", instance_row['Day'])
print("üìÑ Text:", text_instance)

# =========================
# 2Ô∏è‚É£ Convert text to combined embeddings
# =========================
def text_to_combined_vector(text_list):
    embed_vecs = sentence_model.encode(text_list)
    counts = vectorizer.transform(text_list)
    lda_vecs = lda_final.transform(counts)
    return np.hstack([embed_vecs, lda_vecs])

# =========================
# 3Ô∏è‚É£ Fit scaler on embeddings (inputs)
# =========================
train_embeddings = text_to_combined_vector(focus_df['Content'].tolist())
scaler_embed = StandardScaler().fit(train_embeddings)

# =========================
# 4Ô∏è‚É£ SHAP prediction wrapper (scale inputs, inverse scale output)
# =========================
def model_predict_shap(text_list, return_percent=False):
    # --- Convert text to embeddings
    X_vec = text_to_combined_vector(text_list)
    # --- Scale embeddings (as model was trained)
    X_scaled = scaler_embed.transform(X_vec)
    
    # --- Pad or trim to match model input features
    target_dim = 192
    curr_dim = X_scaled.shape[1]
    if curr_dim < target_dim:
        pad = np.zeros((X_scaled.shape[0], target_dim - curr_dim))
        X_scaled = np.hstack([X_scaled, pad])
    else:
        X_scaled = X_scaled[:, :target_dim]

    # --- Repeat for sequence input (GRU expects seq_len=3)
    seq_len = 3
    X_seq = np.array([np.tile(X_scaled[i], (seq_len, 1)) for i in range(len(X_scaled))], dtype=np.float32)
    
    # --- Predict scaled output
    preds_scaled = model.predict(X_seq, verbose=0).flatten()
    
    # --- Inverse scale to original failure counts
    preds_rescaled = scaler_y.inverse_transform(preds_scaled.reshape(-1, 1)).flatten()
    
    if return_percent:
        preds_percent = preds_rescaled / np.array([instance_total]*len(preds_rescaled)) * 100
        return preds_percent
    return preds_rescaled

# =========================
# 5Ô∏è‚É£ SHAP explainer
# =========================
masker = shap.maskers.Text(" ")
explainer = shap.Explainer(model_predict_shap, masker)

# =========================
# 6Ô∏è‚É£ Explain selected instance
# =========================
shap_values = explainer([text_instance])

# =========================
# 7Ô∏è‚É£ Visualize
# =========================
shap.plots.text(shap_values[0])
