# Network visualization with D3.js
---
One quick way to identify topics in tweets is to analyze the use of keywords or hashtags systematically. A common approach is to find pairs of hashtags (or words) that are often mentioned together in the same tweets. Visualizing the result such that pairs that often appear together are drawn close to each other gives us a visual way to explore a topic map.

In this notebook, you'll learn how to quickly visualize networks using D3.js.

You can find a good introduction to the force layout with D3.js here: <a href="https://www.d3indepth.com/force-layout/" target="_blank">Force layout on d3indepth.com</a>

Find a collection of examples of what you can do with D3.js here: <a href="https://observablehq.com/@d3/gallery" target="_blank">D3.js gallery</a>

## 1. Create a view for hashtag pairs

### Explode hashtag array and create a view

In [0]:
create or replace view hashtags as
  -- Make all hashtags lower case for easier comparison
  select id
        ,lower(hashtag) as hashtag
        ,created_at
  from (
    select id, explode(hashtags) as hashtag, created_at
    from tweets
  )

### Create a view for hashtag pairs

In [0]:
create or replace view hashtag_pairs as
select distinct
   id
   ,case when h1 > h2 then h2 else h1 end as h1
   ,case when h1 < h2 then h2 else h1 end as h2 
   from (
     select h1.id
           ,h1.hashtag as h1
           ,h2.hashtag as h2 
     from 
        (select id, hashtag from hashtags where year(created_at) = '2020') h1
     inner join 
        (select id, hashtag from hashtags where year(created_at) = '2020') h2
     on h1.id = h2.id
     and h1.hashtag <> h2.hashtag
  )

In [0]:
select * from hashtag_pairs

## 2. Export the nodes with SQL

### Create a view to extract the nodes

In [0]:
create or replace view nodes as
select hashtag as `id`
      ,count(1) as `size`
from hashtags
where year(created_at) = '2020'
group by hashtag
-- Only hashtags that occured more than n times
having count(1) > 10

### Convert nodes to JSON

In [0]:
%python
import json
df = spark.sql('select id, size from nodes')
nodes = df.toJSON().map(lambda j: json.loads(j)).collect()
#print(nodes)

## 3. Export the edges with SQL

### Create a view to extract the edges

In [0]:
create or replace view edges as
select 
      h1 as `source`
     ,h2 as `target`
     ,count(1) as `weight`
from hashtag_pairs 
where h1 in (select id from nodes)
and h2 in (select id from nodes)
group by h1, h2
order by count(1) desc

### Convert edges to JSON

In [0]:
%python
import json
df = spark.sql('select source, target, weight from edges')
edges = df.toJSON().map(lambda j: json.loads(j)).collect()
#print(edges)

## 4. Create a force layout visulization with D3.js (SVG version)
---
You will need to play around with the parameters until you find a good fit for your data:<br><br>


- `.distance(function(d) { return 2 * (maxWeight  - d.weight); })` - This function determines the physical length between the edges
- `.strength(function(d) { return 1 * (d.weight / maxWeight); })` - This function determines elasticity of the connection - how easily does it stretch?
- `.force("charge", d3.forceManyBody().strength(-250))` - This function sets the gravity (positive/negative) with which the points attract or repel each other
- `.force("x", d3.forceX(width / 2).strength(.01))` - This function (for y respectively) determines the location of a center point and the strength with which each point is attracted to it

In [0]:
%python
html = """
<!DOCTYPE html>
<meta charset="utf-8" />
<style>
  .links line {
    stroke: #999;
    stroke-opacity: 0.6;
  }

  .nodes circle {
    stroke: #fff;
    stroke-width: 1.5px;
  }

  text {
    font-family: sans-serif;
    font-size: 10px;
  }
</style>
<svg width="2000" height="1000"></svg>

<script src="https://d3js.org/d3.v4.min.js"></script>
<script>
  var svg = d3.select("svg"),
    width = +svg.attr("width"),
    height = +svg.attr("height");

  var color = d3.scaleOrdinal(d3.schemeCategory20);

  var data = { nodes: %s, edges: %s };

  // Get the maximum weight for an edge
  var maxWeight = 0;
  for(var i = 0; i < data.edges.length; i++) {
    if(maxWeight < data.edges[i].weight)
      maxWeight = data.edges[i].weight;
  }
    
 var simulation = d3
    .forceSimulation()
    .force("link", d3.forceLink().id(function(d) { return d.id; })
             .distance(function(d) { return 2 * (maxWeight  - d.weight); })      
             .strength(function(d) { return 1 * (d.weight / maxWeight); })
          )
    .force("charge", d3.forceManyBody().strength(-250))
    .force("center", d3.forceCenter(width / 2, height / 2))
    .force("x", d3.forceX(width / 2).strength(.01))
    .force("y", d3.forceY(height / 2).strength(.01))
    //.force('collision', d3.forceCollide().radius(function(d) { return Math.min(30, d.size) }))


  var link = svg
    .append("g")
    .attr("class", "links")
    .selectAll("line")
    .data(data.edges)
    .enter()
    .append("line")
    .attr("stroke-width", function(d) {
        return Math.min(d.weight, 40);
    });

  var node = svg
    .append("g")
    .attr("class", "nodes")
    .selectAll("g")
    .data(data.nodes)
    .enter()
    .append("g");

  var circles = node
    .append("circle")
    .attr("r", function(d) {
      return Math.min(30, d.size);
    })
    .attr("fill", function(d) {
      return color(1);
    })
    .call(
      d3
        .drag()
        .on("start", dragstarted)
        .on("drag", dragged)
        .on("end", dragended)
    );

  var lables = node
    .append("text")
    .text(function(d) {
      return d.id;
    })
    .attr("x", 6)
    .attr("y", 3);

  node.append("title").text(function(d) {
    return d.id;
  });

  simulation.nodes(data.nodes).on("tick", ticked);

  simulation.force("link").links(data.edges);

  function ticked() {
    link
      .attr("x1", function(d) {
        return d.source.x;
      })
      .attr("y1", function(d) {
        return d.source.y;
      })
      .attr("x2", function(d) {
        return d.target.x;
      })
      .attr("y2", function(d) {
        return d.target.y;
      });

    node.attr("transform", function(d) {
      return "translate(" + d.x + "," + d.y + ")";
    });
  }

  function dragstarted(d) {
    if (!d3.event.active) simulation.alphaTarget(0.3).restart();
    d.fx = d.x;
    d.fy = d.y;
  }

  function dragged(d) {
    d.fx = d3.event.x;
    d.fy = d3.event.y;
  }

  function dragended(d) {
    if (!d3.event.active) simulation.alphaTarget(0);
    d.fx = null;
    d.fy = null;
  }  
</script>

""" % (nodes, edges)

#print(html)
displayHTML(html)

## 5. Create a force layout visulization with D3.js (CANVAS version with zoom)
---
You will need to play around with the parameters until you find a good fit for your data:<br><br>


- `.distance(function(d) { return 2 * (maxWeight  - d.weight); })` - This function determines the physical length between the edges
- `.strength(function(d) { return 1 * (d.weight / maxWeight); })` - This function determines elasticity of the connection - how easily does it stretch?
- `.force("charge", d3.forceManyBody().strength(-250))` - This function sets the gravity (positive/negative) with which the points attract or repel each other
- `.force("x", d3.forceX(width / 2).strength(.01))` - This function (for y respectively) determines the location of a center point and the strength with which each point is attracted to it

In [0]:
%python
html = """
  <html>
  <head>
  <meta charset="utf-8" />
  <script src="https://d3js.org/d3-force.v1.min.js"></script>
  <script src="https://d3js.org/d3.v4.min.js"></script>

</head>
  <body>

    <div id="graphDiv"></div>

<hr/>

    <button onclick="download('png')">
      Download PNG
    </button>

    <button onclick="download('jpg')">
      Download JPG
    </button>


 <script>
      var data = { nodes: %s, links: %s };
   
   
      // Get the maximum weight for an edge
      var maxWeight = 0;
      for (var i = 0; i < data.links.length; i++) {
        if (maxWeight < data.links[i].weight) maxWeight = data.links[i].weight;
      }

      var height = 1000;
      var width = 2000;
      

      // Append the canvas to the HTML document
      var graphCanvas = d3
        .select("#graphDiv")
        .append("canvas")
        .attr("width", width + "px")
        .attr("height", height + "px")
        .node();

      var context = graphCanvas.getContext("2d");

      var div = d3
        .select("body")
        .append("div")
        .attr("class", "tooltip")
        .style("opacity", 0);

      var simulation = d3
        .forceSimulation()
        .force(
          "link",
          d3
            .forceLink()
            .id(function(d) {
              return d.id;
            })
            .distance(function(d) {
              return 2 * (maxWeight - d.weight);
            })
            .strength(function(d) {
              return 1 * (d.weight / maxWeight);;
            })
        )
        .force("charge", d3.forceManyBody().strength(-250))
        .force("center", d3.forceCenter(width / 2, height / 2))
        .force("x", d3.forceX(width / 2).strength(0.01))
        .force("y", d3.forceY(height / 2).strength(0.01))
        .alphaTarget(0)
        .alphaDecay(0.05);

      var transform = d3.zoomIdentity;

      initGraph(data);

      function initGraph(tempData) {
        function zoomed() {
          console.log("zooming");
          transform = d3.event.transform;
          simulationUpdate();
        }

        d3.select(graphCanvas)
          .call(
            d3
              .drag()
              .subject(dragsubject)
              .on("start", dragstarted)
              .on("drag", dragged)
              .on("end", dragended)
          )
          .call(
            d3
              .zoom()
              .scaleExtent([1 / 10, 8])
              .on("zoom", zoomed)
          );

        function dragsubject() {
          var i,
            x = transform.invertX(d3.event.x),
            y = transform.invertY(d3.event.y),
            dx,
            dy;
          for (i = tempData.nodes.length - 1; i >= 0; --i) {
            node = tempData.nodes[i];
            dx = x - node.x;
            dy = y - node.y;

            let radius = Math.min(30, node.size)
            if (dx * dx + dy * dy < radius * radius) {
              node.x = transform.applyX(node.x);
              node.y = transform.applyY(node.y);

              return node;
            }
          }
        }

        function dragstarted() {
          if (!d3.event.active) simulation.alphaTarget(0.3).restart();
          d3.event.subject.fx = transform.invertX(d3.event.x);
          d3.event.subject.fy = transform.invertY(d3.event.y);
        }

        function dragged() {
          d3.event.subject.fx = transform.invertX(d3.event.x);
          d3.event.subject.fy = transform.invertY(d3.event.y);
        }

        function dragended() {
          if (!d3.event.active) simulation.alphaTarget(0);
          d3.event.subject.fx = null;
          d3.event.subject.fy = null;
        }

        simulation.nodes(tempData.nodes).on("tick", simulationUpdate);

        simulation.force("link").links(tempData.links);

        function render() {}

        function simulationUpdate() {
          context.save();

          context.clearRect(0, 0, width, height);
          context.translate(transform.x, transform.y);
          context.scale(transform.k, transform.k);

          // Draw the links
          tempData.links.forEach(function(d) {
            context.beginPath();
            context.lineWidth = Math.min(d.weight, 40);
            context.strokeStyle = "rgba(0, 158, 227, .3)";
            context.moveTo(d.source.x, d.source.y);
            context.lineTo(d.target.x, d.target.y);
            context.stroke();
          });

          // Draw the nodes
          tempData.nodes.forEach(function(d, i) {
            context.beginPath();
            context.arc(d.x, d.y, Math.min(30, d.size), 0, 2 * Math.PI, true);
            context.fillStyle = "rgba(0, 158, 227, 0.8)";
            context.fill();
            context.fillStyle = "rgba(0, 0, 0, 1)";
            context.fillText(d.id, d.x + 10, d.y);
          });

          context.restore();
          
        }
      }
      
    function download(type) {
        var canvas = document.querySelector("canvas");

        var imgUrl;
        
        if (type === "png") 
          imgUrl = canvas.toDataURL("image/png");
        else if (type === "jpg") 
          imgUrl = canvas.toDataURL("image/png");

        window.open().document.write('<img src="' + imgUrl + '" />');
      }
    </script>
    
  </body>
</html>
""" % (nodes, edges)

#print(html)
displayHTML(html)

**NOTE**: Using a canvas element hast two advantages:<br><br>

1. It is more performant than SVG and will allow you to visualize more nodes and edges before your browser freezes the simulation
2. You can export to JPG or PNG easily. Use the two buttons to open the canvas as images. Note that you need to right-click -> "Open in new tab" or copy the image in order to save it to disk.