In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import lightgbm as lgb

# Ví dụ dataset
data = pd.DataFrame({
    'text': ["I love this!", "This is terrible", "Amazing product", "Horrible experience"],
    'y': [1, 0, 1, 0]
})

# TF-IDF
vectorizer = TfidfVectorizer(max_features=100)
X = vectorizer.fit_transform(data['text'])
y = data['y']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)


In [None]:
lgb_train = lgb.Dataset(X_train, label=y_train)
lgb_eval = lgb.Dataset(X_test, label=y_test, reference=lgb_train)

params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'verbose': -1
}

model = lgb.train(params, lgb_train, num_boost_round=50, valid_sets=[lgb_eval], early_stopping_rounds=10)


In [None]:
import shap
import matplotlib.pyplot as plt

# Lấy explainer
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)

# Visualize cho một sample
shap.initjs()
shap.force_plot(explainer.expected_value, shap_values[0], feature_names=vectorizer.get_feature_names_out())


In [None]:
from lime.lime_text import LimeTextExplainer
import numpy as np

# LIME với text
class_names = ['No', 'Yes']
explainer = LimeTextExplainer(class_names=class_names)

# Lấy model dự đoán
def predict_proba(texts):
    X_vect = vectorizer.transform(texts)
    return model.predict(X_vect, raw_score=False).reshape(-1, 1)  # cần shape (n_samples, n_classes)

# Giải thích sample đầu tiên
i = 0
exp = explainer.explain_instance(data['text'].iloc[i], predict_proba, num_features=5)
exp.show_in_notebook(text=True)
