In [None]:
import lime
import numpy as np
import pandas as pd
from __future__ import print_function

from lime import lime_text
from lime.lime_text import LimeTextExplainer

import sklearn
import sklearn.ensemble
import sklearn.metrics
from sklearn.pipeline import make_pipeline
from sklearn.datasets import fetch_20newsgroups

%matplotlib inline

## Fetch training and testing data

In [None]:

categories = ['comp.graphics', 'sci.electronics']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)
class_names = ['graphics', 'electronics']

## Build a Vectorizer for the test input

In [None]:
vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False, stop_words='english')
train_vectors = vectorizer.fit_transform(newsgroups_train.data)
test_vectors = vectorizer.transform(newsgroups_test.data)

# Create a Classifier model

In [None]:
model = sklearn.ensemble.RandomForestClassifier(n_estimators=500)
model.fit(train_vectors, newsgroups_train.target)

## Generate predictions for test samples

In [None]:
pred = model.predict(test_vectors)
print(f"F1 score: {sklearn.metrics.f1_score(newsgroups_test.target, pred, average='binary'):.2f}")

## Create a model pipeline and an explainer object

In [None]:
model_pipeline = make_pipeline(vectorizer, model)
explainer = LimeTextExplainer(class_names=class_names)

## Generate explanation for 1 prediction

In [None]:
idx = 270
exp = explainer.explain_instance(newsgroups_test.data[idx], model_pipeline.predict_proba, num_features=6)

page = exp.as_html()

with open('text_explanation.html', 'w', encoding="utf-8") as f:
    f.write(page)