# Putting it all together

Let's put together a few concepts to showcase how to pull this all together. First, let's get some Twitter data (simulated). Then let's parse it using regular expressions. Next, let's introduce networkx to create a graph. Finally, let's take a look at the graph!

In [111]:
tweetData = "https://prototype.visualization.vpr.psu.edu/open/workshop/fromTwitterSample.json"

In [112]:
#Used to make requests
import urllib

f = urllib.urlopen(tweetData)
f.read()

'{"statuses":[{"created_at":"Mon Dec 03 16:09:07 +0000 2018","id":1069624582456381440,"id_str":"1069624582456381440","text":"Very excited for tomorrow\'s workshop featuring #python #jupyter and #twitterAPI (https:\\/\\/t.co\\/hhZnighmNJ)","truncated":false,"entities":{"hashtags":[{"text":"python","indices":[47,54]},{"text":"jupyter","indices":[55,63]},{"text":"twitterAPI","indices":[68,79]}],"symbols":[],"user_mentions":[],"urls":[{"url":"https:\\/\\/t.co\\/hhZnighmNJ","expanded_url":"https:\\/\\/www.eventbrite.com\\/e\\/python-workshops-registration-49943881494","display_url":"eventbrite.com\\/e\\/python-works\\u2026","indices":[81,104]}]},"metadata":{"iso_language_code":"en","result_type":"recent"},"source":"\\u003ca href=\\"http:\\/\\/twitter.com\\" rel=\\"nofollow\\"\\u003eTwitter Web Client\\u003c\\/a\\u003e","in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":126798187,

Looks like a JSON format to me. 

In [113]:
import json

response = urllib.urlopen(tweetData)
data = json.load(response)   
print(json.dumps(data, indent=2))

{
  "search_metadata": {
    "count": 1, 
    "completed_in": 0.068, 
    "max_id_str": "1069624582456381440", 
    "since_id_str": "0", 
    "next_results": "?max_id=1069624582456381439&q=pdudders&count=1&include_entities=1&result_type=recent", 
    "refresh_url": "?since_id=1069624582456381440&q=pdudders&result_type=recent&include_entities=1", 
    "since_id": 0, 
    "query": "pdudders", 
    "max_id": 1069624582456381440
  }, 
  "statuses": [
    {
      "contributors": null, 
      "truncated": false, 
      "text": "Very excited for tomorrow's workshop featuring #python #jupyter and #twitterAPI (https://t.co/hhZnighmNJ)", 
      "is_quote_status": false, 
      "in_reply_to_status_id": null, 
      "id": 1069624582456381440, 
      "favorite_count": 0, 
      "entities": {
        "symbols": [], 
        "user_mentions": [], 
        "hashtags": [
          {
            "indices": [
              47, 
              54
            ], 
            "text": "python"
          }, 
  

In [114]:
response = urllib.urlopen(tweetData)
data = json.load(response) 
print(json.dumps(data["statuses"][0], indent=2))

{
  "contributors": null, 
  "truncated": false, 
  "text": "Very excited for tomorrow's workshop featuring #python #jupyter and #twitterAPI (https://t.co/hhZnighmNJ)", 
  "is_quote_status": false, 
  "in_reply_to_status_id": null, 
  "id": 1069624582456381440, 
  "favorite_count": 0, 
  "entities": {
    "symbols": [], 
    "user_mentions": [], 
    "hashtags": [
      {
        "indices": [
          47, 
          54
        ], 
        "text": "python"
      }, 
      {
        "indices": [
          55, 
          63
        ], 
        "text": "jupyter"
      }, 
      {
        "indices": [
          68, 
          79
        ], 
        "text": "twitterAPI"
      }
    ], 
    "urls": [
      {
        "url": "https://t.co/hhZnighmNJ", 
        "indices": [
          81, 
          104
        ], 
        "expanded_url": "https://www.eventbrite.com/e/python-workshops-registration-49943881494", 
        "display_url": "eventbrite.com/e/python-works\u2026"
      }
    ]
  }, 
  "

Let's go through all tweets and print their message (text)

In [115]:
response = urllib.urlopen(tweetData)
data = json.load(response) 
for tweets in data["statuses"]: 
    print tweets["text"]

Very excited for tomorrow's workshop featuring #python #jupyter and #twitterAPI (https://t.co/hhZnighmNJ)


Now use regular expressions to find all hashtags. We are going to create a graph using this data!

In [120]:
import re
import networkx as nx #https://networkx.github.io/
response = urllib.urlopen(tweetData)
data = json.load(response) 
nodes = []
links = []
counter = 0
for tweets in data["statuses"]: 
    matches = re.findall(r'\#\w+', tweets["text"])
    print matches
    for hashtagsFound in matches:
        if hashtagsFound not in nodes:
            nodes.append(hashtagsFound)
    for source in range(len(matches)):    
        for target in range(source+1,len(matches)):
            links.append([matches[source],matches[target]])
print nodes
print links

[u'#python', u'#jupyter', u'#twitterAPI']
[u'#python', u'#jupyter', u'#twitterAPI']
[[u'#python', u'#jupyter'], [u'#python', u'#twitterAPI'], [u'#jupyter', u'#twitterAPI']]


In [121]:
import json
from collections import OrderedDict 
outputFile = "graph.json"
f = open(outputFile, 'wb')
graphsize = 100
printNodes = OrderedDict()
printLinks = OrderedDict()

j = "" # JSON object
j = j + "{"    
j = j + """\t"nodes": ["""

for n in nodes:
    printNodes[n] = {}
    printNodes[n]['name'] = n

for n in printNodes:
    j = j + str(json.dumps(printNodes[n])) + ",\n"
    
j = j[:-2]
j = j + "\t],\n"
j = j + """\t"links":[\n"""
for link in links:
    printLinks[str(link)] = {}
    printLinks[str(link)]['source'] = nodes.index(link[0])
    printLinks[str(link)]['target'] = nodes.index(link[1])
for l in printLinks:
    j = j + str(json.dumps(printLinks[l])) + ",\n"
j = j[:-2]
j = j + "\t]\n"
j = j + "}"
f.write(j)
f.close()

Now let's create a graph!

In [122]:
%%javascript
require.config({paths: {d3: "//d3js.org/d3.v4.min",}}); // Home directory of D3.JS: https://d3js.org/
// create the network (nodes and links)
require(["d3"], function(d3) {
    window.nodes = [];
    window.links = [];
    d3.json("graph.json", function(error, graph) {
        console.log(graph);
        if (error) throw error;

        graph.nodes.forEach(function(d,i) {
            nodes.push({
                name:d.name,
                index: i,
            })
        })
        graph.links.forEach(function(d,i) {
            links.push({
                source:nodes[d.source],
                target:nodes[d.target],
                index: i,
            })
        })
    })
})

<IPython.core.display.Javascript object>

In [119]:
%%javascript
element.append('<div id="graph1" style="min-width: 310px; height: 1000px; margin: 0 auto"></div>');
require.config({paths: {d3: "//d3js.org/d3.v4.min",}}); // Home directory of D3.JS: https://d3js.org/
// Original Example: https://bl.ocks.org/mbostock/4062045
require(["d3"], function(d3) {
    // Use this to keep the screen clean
    d3.select("div#graph1").selectAll("*").remove();    
    
    var width = 800, height = 1000;
    // Create SVG
    var svg = d3.select("div#graph1").append("svg")
        .attr("width", (width)+"px")
        .attr("height", (height)+"px")
    // create color schema 
    var color = d3.scaleOrdinal(d3.schemeCategory20); // https://github.com/d3/d3-scale

    var simulation = d3.forceSimulation() // https://github.com/d3/d3-force
        .force("link", d3.forceLink().id(function(d) { return d.index; }))
        .force("charge", d3.forceManyBody())
        .force("center", d3.forceCenter(width / 2, height / 2));
    
    var link = svg.append("g") // https://www.w3schools.com/graphics/svg_line.asp
        .attr("class", "links")
        .selectAll("line").data(links).enter().append("line")
        .style("stroke", "black")
        .style("stroke-width", function(d) { return "2px"; })  
    
    
    var node = svg.append("g") // https://www.w3schools.com/graphics/svg_circle.asp
        .attr("class", "nodes")
        .selectAll("circle")
        .data(nodes)
        .enter().append("circle")
        .attr("r", 5)
        .attr("fill", function(d) { return color(d.group); })
        .call(d3.drag()
        .on("start", dragstarted)
        .on("drag", dragged)
        .on("end", dragended));

    node.append("title")
        .text(function(d) { return d.name; });

    simulation
        .nodes(nodes)
        .on("tick", ticked);

    simulation.force("link")
        .links(links);

    function ticked() {
        link
            .attr("x1", function(d) { return d.source.x; })
            .attr("y1", function(d) { return d.source.y; })
            .attr("x2", function(d) { return d.target.x; })
            .attr("y2", function(d) { return d.target.y; });

        node
            .attr("cx", function(d) { return d.x; })
            .attr("cy", function(d) { return d.y; });
    }
    function dragstarted(d) {
        if (!d3.event.active) simulation.alphaTarget(0.3).restart();
        d.fx = d.x;
        d.fy = d.y;
    }

    function dragged(d) {
        d.fx = d3.event.x;
        d.fy = d3.event.y;
    }

    function dragended(d) {
        if (!d3.event.active) simulation.alphaTarget(0);
        d.fx = null;
        d.fy = null;
    }
})

<IPython.core.display.Javascript object>