In [10]:
from IPython.core.display import HTML, Javascript
from string import Template
import pandas as pd
import numpy as np
import json

from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import widgetbox
from bokeh.models import CustomJS, Slider
from bokeh.models.sources import ColumnDataSource
from bokeh.models.ranges import DataRange1d
from bokeh.models.glyphs import VBar
from bokeh.models import FactorRange
from bokeh.models import Title
from bokeh.layouts import gridplot 

output_notebook()

Load d3 library and make it available to the global `window`.

In [11]:
%%javascript

require.config({
    paths: {
        d3: "https://d3js.org/d3.v5.min" // local file doesn't work?
    }
});

require(["d3"], function(d3) {
    window.d3 = d3;
});

<IPython.core.display.Javascript object>

### Embed a D3 vis

#### Load data

In [12]:
with open('../data/lee-params.json') as f:
    data = json.load(f)

#### Load the vis code with the data

In [13]:
load_text = Template('''
    <link rel='stylesheet' href='../src/css/core-notebook.css'>
    <link rel='stylesheet' href='../src/css/comp-params.css'>
    
    <svg class='vis doc-topics' width='100%' height='400px'></svg>
    
    <script src='../src/lib/lodash.min.js'></script>
    <script src='../src/js/core.js'></script>
    <script src='../src/js/comp-params.js'></script>
    
    <script>
    window.visDocTopics = pv.vis.compParams()
        .values(d => d.doc_topics)
        .visTitle('Document Topics')
        .minValue(0.1)
        .termLabels(['documents', 'topics'])
        .minProbLabel('Min Topic Probability');

    const rect = pv.getContentRect(document.querySelector('.doc-topics'));
    visDocTopics.width(rect[0]).height(rect[1]);

    d3.select('.doc-topics')
        .datum($data)
        .call(visDocTopics);
    </script>
''')

HTML(load_text.substitute({'data': json.dumps(data)}))

#### Add a slider to control the thresold

In [14]:
update_threshold = CustomJS(code='''
    visDocTopics.minValue(cb_obj.value).invalidate();
    d3.select('.doc-topics').call(visDocTopics);
''')
  
slider = Slider(start=0, end=1, value=0.1, step=0.01, title="threshold", callback=update_threshold)
show(widgetbox(slider))

In [15]:
from collections import Counter
from itertools import chain

def count_topics(data, threshold):
    'Return (alpha, beta, num_topic, count).'
    docs = data['doc_topics']
    counts = [sum([p >= threshold for p in probs]) for probs in docs]
    c = Counter(counts)
    return [(data['alpha'], data['beta'], i, c[i]) for i in range(1, len(docs[0]) + 1)]

In [16]:
count_data = chain.from_iterable([count_topics(d, 0.1) for d in data])
count_data = [(str(d[0]), str(d[1]), str(d[2]), d[3]) for d in count_data]
df = pd.DataFrame(count_data, columns=['alpha', 'beta', 'num_topics', 'count'])
df.head()

Unnamed: 0,alpha,beta,num_topics,count
0,0.01,0.001,1,188
1,0.01,0.001,2,99
2,0.01,0.001,3,11
3,0.01,0.001,4,1
4,0.01,0.001,5,1


In [17]:
def plot(df, alphas, betas, topics):
    def plot_one(alpha, beta, xaxis, yaxis, xlabel, ylabel):
        'Plot a barchart for a pair of alpha, beta.'
        source = ColumnDataSource(df[(df['alpha']==alpha) & (df['beta']==beta)])
        plot = figure(
            x_range=FactorRange(factors=topics), 
            y_range=DataRange1d(start=0, end=df['count'].max()), 
            plot_width=200 + (30 if yaxis else 0), 
            plot_height=200 + (30 if xaxis else 0), 
            tools='hover',
            x_axis_location='below' if xaxis else None, 
            y_axis_location='left' if yaxis else None,
            tooltips=[('α', '@alpha'), ('β', '@beta'), ('# topics', '@num_topics'), ('# documents', '@count')])
        plot.xgrid.visible = False
        
        
        if xlabel:
            plot.add_layout(Title(text=xlabel, align='center'), 'above')
        if ylabel:
            plot.add_layout(Title(text=ylabel, align='center'), 'right')

        bar = VBar(x='num_topics', top='count', width=0.95, fill_color='steelblue', line_color=None, fill_alpha=0.7)
        plot.add_glyph(source, bar)
        
        return plot

    xs = alphas
    ys = list(reversed(betas))
    plots = [[plot_one(
        x, 
        y, 
        y==ys[-1],        
        x==xs[0],
        'α=' + str(x) if y==ys[0] else None, 
        'β=' + str(y) if x==xs[-1] else None
    ) for x in xs] for y in ys]
    grid = gridplot(plots, toolbar_location=None)
    
    show(grid)

alphas = ['0.01', '0.1', '1', '10']
betas = ['0.001', '0.01', '0.1', '1']
topics = ['1', '2', '3', '4', '5']
plot(df, alphas, betas, topics)