### This notebook contains JS which can only be viewed with nbviewer

In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import re
import pickle
import os
from functools import partial
import infomap

### Json Description:
jsonL is a collection of json objects containing metadata
about the topics on the forums

In [2]:
with open("json10k.p","rb") as f:
    jsonL = pickle.load(f)

### Graph Description:
LikeG is a graph such that:
- Nodes represent individual players
- There is an edge $u \rightarrow v$ with weight $w$ if player $u$ has given $w$ total likes to player $v$.

In [3]:
LikeG = nx.read_graphml('ArenaLikeG.graphml')

In [4]:
print("Num nodes: " + str(len(LikeG.nodes)))
print("Num edges: "+ str(len(LikeG.edges)))

Num nodes: 15163
Num edges: 156852


In [5]:
num_posts_scraped = sum([sum([topic['posts_count'] for topic in json['topic_list']['topics']]) for json in jsonL])
print("Num posts scraped: " + str(num_posts_scraped))

Num posts scraped: 284275


### Estimate size of data accessed by selenium
 - Chrome -> dev console -> Network -> Reload topic with only one post, read off X/Y resources loaded by page, report X*num_posts.

In [6]:
size_of_post_in_MB = 1
size_scraped = size_of_post_in_MB * num_posts_scraped
print("Estimate on size of data accessed: " + str(size_scraped) + " MB")

Estimate on size of data accessed: 284275 MB


### Here we'll compute some node characteristics of LikeG
- Closeness centrality
- Betweenness centrality
- Indegree
- Outdegree
- Indegree/Outdegree
- Clustering Coefficient

In [7]:
try:
    node_data = pd.read_csv('node_data.csv')
except OSError:
    print("Couldn't read file.")
    node_data = pd.DataFrame(index=LikeG.nodes)
    cluster_coeffs = nx.clustering(LikeG,weight='weight')
    node_data.loc[:,'ClusterCoefficient'] = cluster_coeffs.values()
    node_data.loc[:,'Indegree'] = np.array([int(x[1]) for x in LikeG.in_degree(weight='weight')])
    node_data.loc[:,'Outdegree'] = np.array([int(x[1]) for x in LikeG.out_degree(weight='weight')])
    node_data.loc[:,'ClosenessCentrality'] = nx.closeness_centrality(LikeG,distance='weight').values()
    node_data.loc[:,'BetweennessCentrality'] = nx.betweenness_centrality(LikeG,weight='weight').values()
    node_data["InOverOut"] = node_data["Indegree"]/node_data["Outdegree"]
    node_data['Activity'] = node_data['Indegree'] + node_data['Outdegree']
    node_data.to_csv('node_data.csv')

### Let's see the most central users

In [8]:
node_data = node_data.rename({'Unnamed: 0':'name'},axis=1)

In [9]:
node_data.sort_values(by='BetweennessCentrality',ascending=False).head(10)

Unnamed: 0,name,ClusterCoefficient,Indegree,Outdegree,ClosenessCentrality,BetweennessCentrality,InOverOut,Activity
113,Efx-moknathal,0.00018,9972,3749,0.308478,0.022111,2.659909,13721
8,Survïvalïst-bleeding-hollow,0.000215,2108,8833,0.295629,0.019726,0.238651,10941
5,Exalter-malganis,0.000232,5146,3200,0.297711,0.013667,1.608125,8346
84,Rethumtv-tichondrius,0.000232,4273,1487,0.300508,0.012255,2.873571,5760
25,Dillon-sargeras,0.000147,10224,279,0.309096,0.012079,36.645161,10503
206,Kydrav-bleeding-hollow,0.000291,2771,2489,0.300759,0.010596,1.113299,5260
64,Thellendir-sargeras,0.000289,3824,3101,0.297128,0.010227,1.233151,6925
1471,Hexxes-malganis,0.000235,1829,1213,0.304584,0.009876,1.507832,3042
2372,Remixxed-malganis,0.000234,3571,1198,0.30143,0.009725,2.980801,4769
239,Nawat-emerald-dream,0.000272,871,2207,0.291473,0.008528,0.394653,3078


### The most liked users

In [10]:
node_data.sort_values(by='Indegree',ascending=False).head(10)

Unnamed: 0,name,ClusterCoefficient,Indegree,Outdegree,ClosenessCentrality,BetweennessCentrality,InOverOut,Activity
25,Dillon-sargeras,0.000147,10224,279,0.309096,0.012079,36.645161,10503
113,Efx-moknathal,0.00018,9972,3749,0.308478,0.022111,2.659909,13721
5,Exalter-malganis,0.000232,5146,3200,0.297711,0.013667,1.608125,8346
111,Husbandied-barthilas,0.000386,4504,2228,0.297738,0.006313,2.021544,6732
84,Rethumtv-tichondrius,0.000232,4273,1487,0.300508,0.012255,2.873571,5760
64,Thellendir-sargeras,0.000289,3824,3101,0.297128,0.010227,1.233151,6925
2372,Remixxed-malganis,0.000234,3571,1198,0.30143,0.009725,2.980801,4769
87,Laoghaire-emerald-dream,0.000309,3439,2290,0.30115,0.008203,1.501747,5729
206,Kydrav-bleeding-hollow,0.000291,2771,2489,0.300759,0.010596,1.113299,5260
1291,Orphanchild-scilla,0.000299,2754,355,0.291481,0.003976,7.757746,3109


### Dillon-sargeras is currently the only active community moderator

### Restrict to only the most active users when searching for community moderators

In [11]:
active_users = node_data[node_data['Activity']>node_data['Activity'].quantile(.95)]

In [12]:
len(active_users)

757

In [13]:
active_user_graph = LikeG.subgraph(active_users['name'])

In [14]:
np.percentile([active_user_graph.edges[edge]['weight'] for edge in active_user_graph.edges],95)

9.0

In [15]:
weighty_edges = [edge for edge in active_user_graph.edges if active_user_graph.edges[edge]['weight'] >= 10]

In [16]:
to_plot = active_user_graph.edge_subgraph(weighty_edges)

### Setup for community detection: requires a binary detection file

In [17]:
nx.readwrite.pajek.write_pajek(to_plot,'to_plot.paj')

### The --weight-threshold option vastly limits the number of edges. Otherwise Infomap produces one large community. TODO: Try directed louvain

In [18]:
!infomap -i pajek --directed --preferred-number-of-modules 20   --ftree --clu --clu-level -1 to_plot.paj InfoClusters

  Infomap v1.1.4 starts at 2020-10-12 16:13:28
  -> Input network: to_plot.paj
  -> Output path:   InfoClusters/
  -> Configuration: input-format = pajek
                    ftree
                    clu
                    clu-level = -1
                    directed
                    preferred-number-of-modules = 20
Parsing directed pajek network from file 'to_plot.paj'... 
  Parsing vertices...
  -> 478 physical nodes added
  Parsing links...
  -> 2758 links
Done!
-------------------------------------
  -> 478 nodes
  -> 2758 links with total weight 64224
  -> Ordinary network input, using the Map Equation for first order network flows
Calculating global network flow using flow model 'directed'... 
  -> Using unrecorded teleportation to links. 
  -> PageRank calculation done in 50 iterations.

  => Sum node flow: 1, sum link flow: 1
Build internal network with 478 nodes and 2758 links...
Calculating one-level codelength... done!
 -> One-level codelength: 6.00132098
Calculating entr

In [19]:
with open('InfoClusters/to_plot.clu','r') as f:
    clus_data = [line.strip().split(' ')
                 for line in f.readlines() if not line.startswith('#')]

In [20]:
cluster_df = pd.DataFrame(clus_data,columns=['node_id','module','flow'])

In [21]:
cluster_df['node_id'] = cluster_df['node_id'].astype('int')-1
cluster_df['module'] = cluster_df['module'].astype('int')
cluster_df['flow'] = cluster_df['flow'].astype('float')

In [22]:
cluster_df.dtypes

node_id      int32
module       int32
flow       float64
dtype: object

In [23]:
nodeL = list(to_plot.nodes)

In [24]:
cluster_df['name'] = cluster_df['node_id'].map(lambda x: nodeL[x])

In [25]:
cluster_df = cluster_df.set_index('name')

In [28]:
cluster_df.head()

Unnamed: 0_level_0,node_id,module,flow
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Efx-moknathal,205,1,0.088246
Exalter-malganis,39,1,0.03855
Laoghaire-emerald-dream,65,1,0.028283
Rethumtv-tichondrius,233,1,0.028279
Thellendir-sargeras,259,1,0.02642


In [29]:
cluster_data = cluster_df.merge(node_data,left_index=True,right_on='name',how='inner')

### Report the most central figure in each community along with the GLOBAL centrality (in graph with all players)
- These will represent a diverse set of choices for community moderators

In [30]:
cluster_data.loc[cluster_data.groupby(by="module")['BetweennessCentrality'].idxmax(),['name','module','BetweennessCentrality']]

Unnamed: 0,name,module,BetweennessCentrality
113,Efx-moknathal,1,0.022111
2372,Remixxed-malganis,2,0.009725
12,Covlol-illidan,3,0.003722
25,Dillon-sargeras,4,0.012079
394,Boltqt-malygos,5,0.002073
158,Dozêr-sargeras,6,0.004401
4438,Stumblin-emerald-dream,7,0.005834
379,Abombanation-darkspear,8,0.007935
1151,Fox-azshara,9,0.002945
55,Haugs-sargeras,10,0.002671


In [31]:
for node in to_plot.nodes:
    to_plot.nodes[node]['community'] = int(cluster_df.loc[node,'module'])
    to_plot.nodes[node]['name'] = node

In [32]:
to_plot = nx.convert_node_labels_to_integers(to_plot)

In [33]:
from networkx.readwrite import json_graph
import json

In [34]:
json_data = json_graph.node_link_data(to_plot)

In [35]:
with open('to_plot.json','w+',encoding='utf8') as f:
    json.dump(json_data,f,indent=4,ensure_ascii=False)

In [36]:
%%html
<div id="d3-example"></div>
<style>
.node {stroke: #fff; stroke-width: 1.5px;}
.link {stroke: #999; stroke-opacity: .6;}
</style>

In [37]:
%%javascript
// Note: v4->v5 breaks this code
// In v5 promises are used to load
// json, so I'd have to add a .then...
// Color arrays with 20 colors
// have also been changed from v4 to v5

// We load the d3.js library from the Web.
require.config({paths:
    {d3: "https://d3js.org/d3.v4.min"}});
require(["d3"], function(d3) {
  // The code in this block is executed when the
  // d3.js library has been loaded.
  const drag = simulation => {

        function dragstarted(d) {
        if (!d3.event.active) simulation.alphaTarget(0.3).restart();
            d.fx = d.x;
            d.fy = d.y;
        }

        function dragged(d) {
            d.fx = d3.event.x;
            d.fy = d3.event.y;
        }

        function dragended(d) {
        if (!d3.event.active) simulation.alphaTarget(0);
            d.fx = null;
            d.fy = null;
        }

        return d3.drag()
          .on("start", dragstarted)
          .on("drag", dragged)
          .on("end", dragended);
    }
  // First, we specify the size of the canvas
  // containing the visualization (size of the
  // <div> element).
  var width = 1200, height = 1200;

  // We create a color scale.
  var color = d3.scaleOrdinal(d3.schemeCategory20);

  // We create a force-directed dynamic graph layout.
  var sim = d3.forceSimulation()
    .force("charge", d3.forceManyBody())
    .force("center", d3.forceCenter(width / 2, height / 2));
    //.size([width, height]);

  // In the <div> element, we create a <svg> graphic
  // that will contain our interactive visualization.
  var svg = d3.select("#d3-example").select("svg")
  if (svg.empty()) {
    svg = d3.select("#d3-example").append("svg")
          .attr("width", width)
          .attr("height", height);
  }

  // We load the JSON file.
  d3.json("to_plot.json", function(error, graph) {
    // In this block, the file has been loaded
    // and the 'graph' object contains our graph.

    // We load the nodes and links in the
    // force-directed graph.
    sim.nodes(graph.nodes);

    // We create a <line> SVG element for each link
    // in the graph.
    var link = svg.selectAll(".link")
      .data(graph.links)
      .enter().append("line")
      .attr("class", "link");

    // We create a <circle> SVG element for each node
    // in the graph, and we specify a few attributes.
    var node = svg.selectAll(".node")
      .data(graph.nodes)
      .enter().append("circle")
      .attr("class", "node")
      .attr("r", 5)  // radius
      .style("fill", function(d) {
         // The node color depends on the indegree.
         return color(d.community);
      })
      .call(drag(sim));
    
    sim.force("link", d3.forceLink(graph.links));

    // The name of each node is the player name.
    node.append("title")
        .text(function(d) { return d.name; });

    // We bind the positions of the SVG elements
    // to the positions of the dynamic force-directed
    // graph, at each time step.
    sim.on("tick", function() {
      link.attr("x1", function(d){return d.source.x})
          .attr("y1", function(d){return d.source.y})
          .attr("x2", function(d){return d.target.x})
          .attr("y2", function(d){return d.target.y});

      node.attr("cx", function(d){return d.x})
          .attr("cy", function(d){return d.y});
    });
  });
});

<IPython.core.display.Javascript object>

In [33]:
import gc
gc.collect()

113