In [3]:
import pandas as pd

from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

In [4]:
newsgroups_train = fetch_20newsgroups(subset='train')
newsgroups_test = fetch_20newsgroups(subset='test')

In [5]:
vectorizer = TfidfVectorizer()
vectors_train = vectorizer.fit_transform(newsgroups_train.data)
vectors_test = vectorizer.transform(newsgroups_test.data)

In [6]:
clf = MultinomialNB()
clf.fit(vectors_train, newsgroups_train.target)
predicted = clf.predict(vectors_test)
expected = newsgroups_test.target

In [7]:
def name_targets(target_names, targets):
    return [target_names[t] for t in targets]

In [8]:
df = pd.DataFrame({
    'expected': name_targets(newsgroups_test.target_names, expected),
    'predicted': name_targets(newsgroups_test.target_names, predicted),
    # shorten the texts to 1000 chars to reduce the volume of data to be sent to Facets Dive
    'data': [text[:1000] + '...' for text in newsgroups_test.data],
    # add text lengths
    'length': [len(text) for text in newsgroups_test.data],
}, columns=['expected', 'predicted', 'length', 'data'])

In [9]:
# sample of records to be visualized
df.head()

Unnamed: 0,expected,predicted,length,data
0,rec.autos,rec.autos,695,From: v064mb9k@ubvmsd.cc.buffalo.edu (NEIL B. ...
1,comp.windows.x,sci.crypt,939,From: Rick Miller <rick@ee.uwm.edu>\nSubject: ...
2,alt.atheism,alt.atheism,453,From: mathew <mathew@mantis.co.uk>\nSubject: R...
3,talk.politics.mideast,talk.politics.mideast,5239,From: bakken@cs.arizona.edu (Dave Bakken)\nSub...
4,talk.religion.misc,alt.atheism,1007,From: livesey@solntze.wpd.sgi.com (Jon Livesey...


In [11]:
from IPython.core.display import display, HTML

HTML_TEMPLATE = """<link rel="import" href="/nbextensions/facets-dist/facets-jupyter.html">
        <facets-dive id="fd" height="600"></facets-dive>
        <script>
          var data = {jsonstr};
          var fd = document.querySelector("#fd");
          fd.data = data;
          fd['verticalFacet'] = 'predicted';
          fd['verticalBuckets'] = 8;
          fd['horizontalFacet'] = 'expected';
          fd['horizontalBuckets'] = 8;
          fd['colorBy'] = 'expected';
        </script>
        <style>.container {{ width:100% !important; }}</style>"""
html = HTML_TEMPLATE.format(jsonstr=df.to_json(orient='records'))
display(HTML(html))