# Topic Modeling - Graphically

When considering topic modeling. It can be difficult to visualize the complexity when considering multiple topics. Presented is a means to create graphical concpetual model of documents in N-dimensions. 

For this exercise we will be using D3.js to produce our graphic for topic modeling dimensionality. To start using D3.js in JupyterLab or Google Colab, we need to include the following pre-run call. Meaning, each time we run a cell. We want JupyterLab to make the D3.js library available. 

In [15]:
from IPython.display import  HTML

def load_d3_in_cell_output():
  display(HTML("<script src='https://d3js.org/d3.v6.min.js'></script>"))
get_ipython().events.register('pre_run_cell', load_d3_in_cell_output)

In [16]:
%%html
<div id="triangle1"></div>

<script type="text/javascript">   
    var width = 300
    var height = 300
    var triangles = 1
    var dataset = d3.range(triangles*3).map(d=>d)
    var tri_size = 10
    
    var svg = d3.select("div#triangle1").append("svg")
        .attr("width", width)
        .attr("height", height)
        
    var tri_x = [(tri_size/2), tri_size, 0]
    var tri_y = [0, tri_size, tri_size]
    svg.selectAll("circle")
        .data(d3.range(triangles*3).map(d=>d))
        .join("circle")
        .attr("cx", (d,i)=> tri_x[i])
        .attr("cy", (d,i)=> tri_y[i])
        .attr("r", 4)
        .style("fill", "purple")
        .style("stroke", "black")
        .style("stroke-width", 3) // reminder, this means 3 pixels

</script>

In [45]:
%%html
<div id="triangle2"></div>

<script type="text/javascript">   
    var width = 300
    var height = 300
    var margin = 20
    var triangles = 1
    var tri_col = 10
    var tri_row = 10
    var tri_size = 10
    var x = d3.scaleLinear().range([margin , width - margin]).domain([0,tri_col-1])
    var y = d3.scaleLinear().range([margin , height-margin]).domain([0,tri_row-1])
    var tri_x = [(tri_size/2), tri_size, 0]
    var tri_y = [0, tri_size, tri_size]   
    
    var svg = d3.select("div#triangle2").append("svg")
        .attr("width", width)
        .attr("height", height)
        
    var g = svg.selectAll("g")
        .data(d3.range(tri_col*tri_row).map(d=>d))
        .join("g")
        .attr("transform", d => "translate("+(x(d%tri_col))+","+(y(Math.floor(d/tri_col)))+")")
    g.selectAll("circle")
        .data(d3.range(triangles*3).map(d=>d))
        .join("circle")
        .attr("cx", (d,i)=> tri_x[i])
        .attr("cy", (d,i)=> tri_y[i])
        .attr("r", 4)
        .style("fill", "purple")
        .style("stroke", "black")
        .style("stroke-width", 3) // reminder, this means 3 pixels

</script>

Topics = 5

$n = 5$

$r = 3$

$\frac{n!}{ r! (n - r)!}$

In [116]:
%%html
<div id="emoji1"></div>

<script type="text/javascript">   
    var width = 600
    var height = 600
    var margin = 30
    var fac = n => !(n > 1) ? 1 : fac(n - 1) * n;
    var n = 5
    var r = 3 
    var soup = '🐈,🐦,🐳,🐧,🐕,🐙,🐝,🐄,🐪,🐍,🐞,🐬,🐑,🐉,🐤,🐢,🐒,🐘,🐠,🐁'.split(',');
    
    var topics = d3.range(n).map(d=>soup[d])
    var sets = []
    for (let i = 0; i < topics.length - 1; i++) {
        for (let j = i+1; j < topics.length - 1; j++) {
            for (let k = j+1; k < topics.length; k++) {
                var temp = []
                temp.push(topics[i])
                temp.push(topics[j])
                temp.push(topics[k])
                sets.push(temp)
            }
        }
    }

    var triangles = fac(n) / ( fac(r) * fac(n - r) )
    var tri_col = 10
    var tri_row = Math.ceil(sets.length/tri_col)
    var tri_size = 20
    var x = d3.scaleLinear().range([margin , width - margin]).domain([0,tri_col-1])
    var y = d3.scaleBand().range([margin, height-margin]).domain(d3.range(tri_row))
    console.log(d3.range(tri_row))
  
    var tri_x = [(tri_size/2), tri_size, 0]
    var tri_y = [0, tri_size, tri_size]   
    var palette = d3.interpolateTurbo
    var color = d3.scaleLinear().range([0,1]).domain([0,topics.length-1])
    console.log(y.bandwidth())
    
    var svg = d3.select("div#emoji1").append("svg")
        .attr("width", width)
        .attr("height", height)
        
    var g = svg.selectAll("g")
        .data(sets)
        .join("g")
        .attr("transform", (d,i) => "translate("+(x(i%tri_col))+","+(y(Math.floor(i/tri_col))+(y.bandwidth()/2))+")")
    g.append("line")
        .attr("x1", (d,i)=> tri_x[0])
        .attr("y1", (d,i)=> tri_y[0])
        .attr("x2", (d,i)=> tri_x[1])
        .attr("y2", (d,i)=> tri_y[1])
        .style("stroke", "black")
        .style("stroke-width", "1px")
    g.append("line")
        .attr("x1", (d,i)=> tri_x[1])
        .attr("y1", (d,i)=> tri_y[1])
        .attr("x2", (d,i)=> tri_x[2])
        .attr("y2", (d,i)=> tri_y[2])
        .style("stroke", "black")
        .style("stroke-width", "1px")
    g.append("line")
        .attr("x1", (d,i)=> tri_x[2])
        .attr("y1", (d,i)=> tri_y[2])
        .attr("x2", (d,i)=> tri_x[0])
        .attr("y2", (d,i)=> tri_y[0])
        .style("stroke", "black")
        .style("stroke-width", "1px")    
    
    g.selectAll("text")
        .data(d=>d)
        .join("text")
        .attr("x", (d,i)=> tri_x[i])
        .attr("y", (d,i)=> tri_y[i])
        .style("text-anchor", "middle")
        .text(d=>d)

</script>

Animals = 6 

In [106]:
%%html
<div id="emoji2"></div>

<script type="text/javascript">   
    var width = 600
    var height = 600
    var margin = 30
    var fac = n => !(n > 1) ? 1 : fac(n - 1) * n;
    var n = 6
    var r = 3 
    var soup = '🐈,🐦,🐳,🐧,🐕,🐙,🐝,🐄,🐪,🐍,🐞,🐬,🐑,🐉,🐤,🐢,🐒,🐘,🐠,🐁'.split(',');
    
    var topics = d3.range(n).map(d=>soup[d])
    var sets = []
    for (let i = 0; i < topics.length - 1; i++) {
        for (let j = i+1; j < topics.length - 1; j++) {
            for (let k = j+1; k < topics.length; k++) {
                var temp = []
                temp.push(topics[i])
                temp.push(topics[j])
                temp.push(topics[k])
                sets.push(temp)
            }
        }
    }

    var triangles = fac(n) / ( fac(r) * fac(n - r) )
    var tri_col = 10
    var tri_row = Math.ceil(sets.length/tri_col)
    var tri_size = 20
    var x = d3.scaleLinear().range([margin , width - margin]).domain([0,tri_col-1])
    var y = d3.scaleBand().range([margin, height-margin]).domain(d3.range(tri_row))
    console.log(d3.range(tri_row))
  
    var tri_x = [(tri_size/2), tri_size, 0]
    var tri_y = [0, tri_size, tri_size]   
    var palette = d3.interpolateTurbo
    var color = d3.scaleLinear().range([0,1]).domain([0,topics.length-1])
    console.log(y.bandwidth())
    
    var svg = d3.select("div#emoji2").append("svg")
        .attr("width", width)
        .attr("height", height)
        
    var g = svg.selectAll("g")
        .data(sets)
        .join("g")
        .attr("transform", (d,i) => "translate("+(x(i%tri_col))+","+(y(Math.floor(i/tri_col))+(y.bandwidth()/2))+")")
    g.append("line")
        .attr("x1", (d,i)=> tri_x[0])
        .attr("y1", (d,i)=> tri_y[0])
        .attr("x2", (d,i)=> tri_x[1])
        .attr("y2", (d,i)=> tri_y[1])
        .style("stroke", "black")
        .style("stroke-width", "1px")
    g.append("line")
        .attr("x1", (d,i)=> tri_x[1])
        .attr("y1", (d,i)=> tri_y[1])
        .attr("x2", (d,i)=> tri_x[2])
        .attr("y2", (d,i)=> tri_y[2])
        .style("stroke", "black")
        .style("stroke-width", "1px")
    g.append("line")
        .attr("x1", (d,i)=> tri_x[2])
        .attr("y1", (d,i)=> tri_y[2])
        .attr("x2", (d,i)=> tri_x[0])
        .attr("y2", (d,i)=> tri_y[0])
        .style("stroke", "black")
        .style("stroke-width", "1px")    
    
    g.selectAll("text")
        .data(d=>d)
        .join("text")
        .attr("x", (d,i)=> tri_x[i])
        .attr("y", (d,i)=> tri_y[i])
        .style("text-anchor", "middle")
        .text(d=>d)

</script>

Animals = 7

In [119]:
%%html
<div id="emoji3"></div>

<script type="text/javascript">   
    var width = 600
    var height = 600
    var margin = 30
    var fac = n => !(n > 1) ? 1 : fac(n - 1) * n;
    var n = 7
    var r = 3 
    var soup = '🐈,🐦,🐳,🐧,🐕,🐙,🐝,🐄,🐪,🐍,🐞,🐬,🐑,🐉,🐤,🐢,🐒,🐘,🐠,🐁'.split(',');
    
    var topics = d3.range(n).map(d=>soup[d])
    var sets = []
    for (let i = 0; i < topics.length - 1; i++) {
        for (let j = i+1; j < topics.length - 1; j++) {
            for (let k = j+1; k < topics.length; k++) {
                var temp = []
                temp.push(topics[i])
                temp.push(topics[j])
                temp.push(topics[k])
                sets.push(temp)
            }
        }
    }

    var triangles = fac(n) / ( fac(r) * fac(n - r) )
    var tri_col = 10
    var tri_row = Math.ceil(sets.length/tri_col)
    var tri_size = 20
    var x = d3.scaleLinear().range([margin , width - margin]).domain([0,tri_col-1])
    var y = d3.scaleBand().range([margin, height-margin]).domain(d3.range(tri_row))
    console.log(d3.range(tri_row))
  
    var tri_x = [(tri_size/2), tri_size, 0]
    var tri_y = [0, tri_size, tri_size]   
    var palette = d3.interpolateTurbo
    var color = d3.scaleLinear().range([0,1]).domain([0,topics.length-1])
    console.log(y.bandwidth())
    
    var svg = d3.select("div#emoji3").append("svg")
        .attr("width", width)
        .attr("height", height)
        
    var g = svg.selectAll("g")
        .data(sets)
        .join("g")
        .attr("transform", (d,i) => "translate("+(x(i%tri_col))+","+(y(Math.floor(i/tri_col))+(y.bandwidth()/2))+")")
    g.append("line")
        .attr("x1", (d,i)=> tri_x[0])
        .attr("y1", (d,i)=> tri_y[0])
        .attr("x2", (d,i)=> tri_x[1])
        .attr("y2", (d,i)=> tri_y[1])
        .style("stroke", "black")
        .style("stroke-width", "2px")
    g.append("line")
        .attr("x1", (d,i)=> tri_x[1])
        .attr("y1", (d,i)=> tri_y[1])
        .attr("x2", (d,i)=> tri_x[2])
        .attr("y2", (d,i)=> tri_y[2])
        .style("stroke", "black")
        .style("stroke-width", "2px")
    g.append("line")
        .attr("x1", (d,i)=> tri_x[2])
        .attr("y1", (d,i)=> tri_y[2])
        .attr("x2", (d,i)=> tri_x[0])
        .attr("y2", (d,i)=> tri_y[0])
        .style("stroke", "black")
        .style("stroke-width", "2px")    
    
    g.selectAll("text")
        .data(d=>d)
        .join("text")
        .attr("x", (d,i)=> tri_x[i])
        .attr("y", (d,i)=> tri_y[i])
        .style("text-anchor", "middle")
        .style("alignment-baseline","middle")
        .text(d=>d)

</script>

In [48]:
%%html
<div id="triangle5"></div>

<script type="text/javascript">   
    var width = 300
    var height = 300
    var margin = 20
    var fac = n => !(n > 1) ? 1 : fac(n - 1) * n;
    var n = 6
    var r = 3 
    var soup = 'abcdefghijklmnopqrstuvwxyz'.split('');
    var topics = d3.range(n).map(d=>soup[d])
    var sets = []
    for (let i = 0; i < topics.length - 1; i++) {
        for (let j = i+1; j < topics.length - 1; j++) {
            for (let k = j+1; k < topics.length; k++) {
                var temp = []
                temp.push(topics[i])
                temp.push(topics[j])
                temp.push(topics[k])
                sets.push(temp)
            }
        }
    }

    var triangles = fac(n) / ( fac(r) * fac(n - r) )
    var tri_col = 10
    var tri_row = Math.ceil(sets.length/tri_col)
    var tri_size = 10
    var x = d3.scaleLinear().range([margin , width - margin]).domain([0,tri_col-1])
    var y = d3.scaleLinear().range([margin , height-margin]).domain([0,tri_row-1])
    var tri_x = [(tri_size/2), tri_size, 0]
    var tri_y = [0, tri_size, tri_size]   
    var palette = d3.interpolateTurbo
    var color = d3.scaleLinear().range([0,1]).domain([0,topics.length-1])
    
    
    var svg = d3.select("div#triangle5").append("svg")
        .attr("width", width)
        .attr("height", height)
        
    var g = svg.selectAll("g")
        .data(sets)
        .join("g")
        .attr("transform", (d,i) => "translate("+(x(i%tri_col))+","+(y(Math.floor(i/tri_col)))+")")
    g.selectAll("circle")
        .data(d=>d)
        .join("circle")
        .attr("cx", (d,i)=> tri_x[i])
        .attr("cy", (d,i)=> tri_y[i])
        .attr("r", 5)
        .style("fill", d=> palette(color(topics.indexOf(d))))
        .style("stroke", "black")
        .style("stroke-width", 1) // reminder, this means 3 pixels

</script>

In [49]:
%%html
<div id="triangle6"></div>

<script type="text/javascript">   
    var width = 300
    var height = 300
    var margin = 20
    var fac = n => !(n > 1) ? 1 : fac(n - 1) * n;
    var n = 7
    var r = 3 
    var soup = 'abcdefghijklmnopqrstuvwxyz'.split('');
    var topics = d3.range(n).map(d=>soup[d])
    var sets = []
    for (let i = 0; i < topics.length - 1; i++) {
        for (let j = i+1; j < topics.length - 1; j++) {
            for (let k = j+1; k < topics.length; k++) {
                var temp = []
                temp.push(topics[i])
                temp.push(topics[j])
                temp.push(topics[k])
                sets.push(temp)
            }
        }
    }
    
    var triangles = fac(n) / ( fac(r) * fac(n - r) )
    var tri_col = 10
    var tri_row = Math.ceil(sets.length/tri_col)
    var tri_size = 10
    var x = d3.scaleLinear().range([margin , width - margin]).domain([0,tri_col-1])
    var y = d3.scaleLinear().range([margin , height-margin]).domain([0,tri_row-1])
    var tri_x = [(tri_size/2), tri_size, 0]
    var tri_y = [0, tri_size, tri_size]   
    var palette = d3.interpolateTurbo
    var color = d3.scaleLinear().range([0,1]).domain([0,topics.length-1])
    
    
    var svg = d3.select("div#triangle6").append("svg")
        .attr("width", width)
        .attr("height", height)
        
    var g = svg.selectAll("g")
        .data(sets)
        .join("g")
        .attr("transform", (d,i) => "translate("+(x(i%tri_col))+","+(y(Math.floor(i/tri_col)))+")")
    g.selectAll("circle")
        .data(d=>d)
        .join("circle")
        .attr("cx", (d,i)=> tri_x[i])
        .attr("cy", (d,i)=> tri_y[i])
        .attr("r", 5)
        .style("fill", d=> palette(color(topics.indexOf(d))))
        .style("stroke", "black")
        .style("stroke-width", 1) // reminder, this means 3 pixels

</script>

Topics = 8

In [52]:
%%html
<div id="triangle7"></div>

<script type="text/javascript">   
    var width = 600
    var height = 200
    var margin = 20
    var fac = n => !(n > 1) ? 1 : fac(n - 1) * n;
    var n = 8
    var r = 3 
    var soup = 'abcdefghijklmnopqrstuvwxyz'.split('');
    var topics = d3.range(n).map(d=>soup[d])
    var sets = []
    for (let i = 0; i < topics.length - 1; i++) {
        for (let j = i+1; j < topics.length - 1; j++) {
            for (let k = j+1; k < topics.length; k++) {
                var temp = []
                temp.push(topics[i])
                temp.push(topics[j])
                temp.push(topics[k])
                sets.push(temp)
            }
        }
    }

    var triangles = fac(n) / ( fac(r) * fac(n - r) )
    var tri_col = 10
    var tri_row = Math.ceil(sets.length/tri_col)
    var tri_size = 10
    var x = d3.scaleLinear().range([margin , width - margin]).domain([0,tri_col-1])
    var y = d3.scaleLinear().range([margin , height-margin]).domain([0,tri_row-1])
    var tri_x = [(tri_size/2), tri_size, 0]
    var tri_y = [0, tri_size, tri_size]   
    var palette = d3.interpolateTurbo
    var color = d3.scaleLinear().range([0,1]).domain([0,topics.length-1])
    
    
    var svg = d3.select("div#triangle7").append("svg")
        .attr("width", width)
        .attr("height", height)
        
    var g = svg.selectAll("g")
        .data(sets)
        .join("g")
        .attr("transform", (d,i) => "translate("+(x(i%tri_col))+","+(y(Math.floor(i/tri_col)))+")")
    g.selectAll("circle")
        .data(d=>d)
        .join("circle")
        .attr("cx", (d,i)=> tri_x[i])
        .attr("cy", (d,i)=> tri_y[i])
        .attr("r", 5)
        .style("fill", d=> palette(color(topics.indexOf(d))))
        .style("stroke", "black")
        .style("stroke-width", 1) // reminder, this means 3 pixels

</script>

You can see how the complexity increase with the number of topics. Using the slider, change the number of topics to see how much this space increase with the number of topics. 

In [129]:
%%html
<input type="range" min="3" max="15" value="3" name="topics" oninput="graph(+this.value)">
<label for="topics">Topics: </label><em id="topics" style="font-style: normal;">3</em>
<div id="emojiN"></div>

<script type="text/javascript">   
function graph(n) {
    document.getElementById('topics').innerHTML = n
    var width = 600
    var height = 2000
    var margin = 30
    var fac = n => !(n > 1) ? 1 : fac(n - 1) * n;
    var r = 3 
    var soup = '🐈,🐦,🐳,🐧,🐕,🐙,🐝,🐄,🐪,🐍,🐞,🐬,🐑,🐉,🐤,🐢,🐒,🐘,🐠,🐁'.split(',');
    
    var topics = d3.range(n).map(d=>soup[d])
    var sets = []
    for (let i = 0; i < topics.length - 1; i++) {
        for (let j = i+1; j < topics.length - 1; j++) {
            for (let k = j+1; k < topics.length; k++) {
                var temp = []
                temp.push(topics[i])
                temp.push(topics[j])
                temp.push(topics[k])
                sets.push(temp)
            }
        }
    }

    var triangles = fac(n) / ( fac(r) * fac(n - r) )
    var tri_col = 10
    var tri_row = Math.ceil(sets.length/tri_col)
    var tri_size = 20
    var x = d3.scaleLinear().range([margin , width - margin]).domain([0,tri_col-1])
    var y = d3.scaleBand().range([margin, height-margin]).domain(d3.range(tri_row))
  
    var tri_x = [(tri_size/2), tri_size, 0]
    var tri_y = [0, tri_size, tri_size]   
    var palette = d3.interpolateTurbo
    var color = d3.scaleLinear().range([0,1]).domain([0,topics.length-1])
    d3.select("div#emojiN").select("svg").remove()
    var svg = d3.select("div#emojiN").append("svg")
        .attr("width", width)
        .attr("height", height)
        
    var g = svg.selectAll("g")
        .data(sets)
        .join("g")
        .attr("transform", (d,i) => "translate("+(x(i%tri_col))+","+(y(Math.floor(i/tri_col))+(y.bandwidth()/2))+")")
    g.append("line")
        .attr("x1", (d,i)=> tri_x[0])
        .attr("y1", (d,i)=> tri_y[0])
        .attr("x2", (d,i)=> tri_x[1])
        .attr("y2", (d,i)=> tri_y[1])
        .style("stroke", "black")
        .style("stroke-width", "2px")
    g.append("line")
        .attr("x1", (d,i)=> tri_x[1])
        .attr("y1", (d,i)=> tri_y[1])
        .attr("x2", (d,i)=> tri_x[2])
        .attr("y2", (d,i)=> tri_y[2])
        .style("stroke", "black")
        .style("stroke-width", "2px")
    g.append("line")
        .attr("x1", (d,i)=> tri_x[2])
        .attr("y1", (d,i)=> tri_y[2])
        .attr("x2", (d,i)=> tri_x[0])
        .attr("y2", (d,i)=> tri_y[0])
        .style("stroke", "black")
        .style("stroke-width", "2px")    
    
    g.selectAll("text")
        .data(d=>d)
        .join("text")
        .attr("x", (d,i)=> tri_x[i])
        .attr("y", (d,i)=> tri_y[i])
        .style("text-anchor", "middle")
        .style("alignment-baseline","middle")
        .text(d=>d)

}
graph(3)
</script>