# Social Rank and Hierarchy
Network analysis can be used to examine social status within a community. This lab will use data from the 1995-1997 Teenage Friends and Lifestyle study [MA1997]. This study collected friendship and other data from a group of 50 teenage girls over the course of three years.

In [None]:
%pylab inline
import itertools
import json
import math
import urllib.request
import networkx as nx
import networkx as networkx
import networkx.algorithms as nxalg
import networkx.algorithms.community as nxcom
import networkx.readwrite as nxrw
import numpy as np
import pandas as pd
import visJS2jupyter.visJS_module as vjs
import re
from springrank.SpringRank_tools import SpringRank
from springrank.tools import build_graph_from_adjacency


In [None]:
# Helper functions

def load_social():
    G = nx.DiGraph()
    edges = dict()
    with open("external/facebook-wosn-wall/out.facebook-wosn-wall") as f:
        for row in f:
            if row[0] == "%":
                continue
            try:
                data = [int(x) for x in re.split("\s+", row.strip())]
                source, target, weight, timestamp = data
            except ValueError:
                print(row)
                print(re.split("\s+", row))
                raise
            try:
                edges[(source, target)] += 1
            except KeyError:
                edges[(source, target)] = 1
    for source, target in edges:
        G.add_edge(source, target)
    return G

def load_tfls():
    G = nx.DiGraph()
    with open("external/s50_data/s50-network1.dat") as f:
        for row, row_data in enumerate(f):
            for col, cell_data in enumerate(re.split("\s", row_data.strip())):
                if float(cell_data) != 0:
                    G.add_edge(row, col, weight=cell_data, value=cell_data)
    nx.set_node_attributes(G, dict((v, v) for v in G.nodes()), name="label")
    return G

def visualize_visjs(
        G, communities=None, colors=None, default_color="192,192,192",
        node_size_field="node_size", layout="spring", scale=500, pos=None,
        groups=None, weight=None, labels=dict(), title=""):
    # Get list of nodes and edges
    nodes = list(G.nodes())
    edges = list(G.edges())
    # Change node shapes for bipartite graph
    if groups is None:
        node_shapes = dict()
    else:
        node_shapes = dict((n, "square") for n in groups)
    # Per-node properties
    nodes_dict = dict((n, {
        "id": labels.get(n, n),
        "node_size": 5,
        "node_shape": node_shapes.get(n, "dot")
        }) for n in nodes)
    # Generate a layout for the nodes
    edge_smooth_enabled = False
    edge_width = 4
    edge_arrow_scale = 1
    if communities is not None and pos is None:
        # Generate initial positions based on community
        phi = 3.14 / len(nodes)
        community_node = []
        # Create list of nodes and their communities
        for i, com in enumerate(sorted(communities, key=lambda x: len(x), reverse=True)):
            for node in com:
                community_node.append((i, node))
        # Sort by community and
        community_node = sorted(community_node)
        # Generate initial position by placing communities around a circle
        pos = dict((d[1], (math.cos(i*phi), math.sin(i*phi))) for i, d in enumerate(community_node))
    if layout == "circle":
        pos = nx.circular_layout(G, scale=scale)
    elif layout == "spring":
        pos = nx.spring_layout(G, k=3/math.sqrt(len(nodes)), scale=scale, pos=pos)
    else:
        edge_smooth_enabled = True
        edge_width = 20
        edge_arrow_scale = 2
    for n in nodes:
        nodes_dict[n]["x"] = pos[n][0],
        nodes_dict[n]["y"] = pos[n][1]
    # If we have communities, assign color based on community
    if colors is None:
        colors = [
            "0,255,0",
            "0,0,255",
            "0,255,255",
            "127,127,0",
            "255,0,0",
            "127,127,255"]
    if communities is not None:
        for i, com in enumerate(sorted(communities, key=lambda x: len(x), reverse=True)):
            for node in com:
                try:
                    nodes_dict[node]["color"] = "rgba({},1)".format(colors[i])
                    nodes_dict[node]["color_index"] = i
                except IndexError:
                    nodes_dict[node]["color"] = "rgba({},1)".format(default_color)
    # Map node labels to contiguous ids
    node_map = dict(zip(nodes,range(len(nodes))))
    # Determine edge colors
    edge_colors_idx = {}
    for source, target in edges:
        source_color = nodes_dict[source].get("color_index", None)
        target_color = nodes_dict[target].get("color_index", None)
        if source_color == target_color and source_color is not None:
            edge_colors_idx[(source, target)] = source_color
    edge_colors = dict(
        (e,colors[c])
        for e, c in edge_colors_idx.items() if c < len(colors))
    # Per-edge properties, use contiguous ids to identify nodes
    edges_dict = [{
        "source": node_map[source],
        "target": node_map[target],
        "title":'test',
        "edge_width": data.get(weight, 1),
        "color": "rgba({},0.4)".format(edge_colors.get((source,target), default_color))}
        for source, target, data in G.edges(data=True)]
    # Convert nodes dict to node list
    nodes_list = [nodes_dict[n] for n in nodes]
    # Check for directed graph
    if G.__class__ == networkx.classes.digraph.DiGraph:
        directed = True
    else:
        directed = False
    # Call visjs
    return vjs.visjs_network(
        nodes_list, edges_dict,
        node_size_field="node_size",
        node_size_multiplier=10.0,
        edge_width=edge_width,
        edge_arrow_to=directed,
        edge_arrow_to_scale_factor=edge_arrow_scale,
        edge_smooth_enabled=edge_smooth_enabled,
        edge_smooth_type="curvedCW",
        graph_id=hash(title))

## Directed Networks
### Load and visualize the social network data
Now we'll load and visualize the friendships. The [MA1997] study determined friendships by asking participants to name their top friends. This method has in interesting feature: it is possible for a participant to list someone as a friend who does not list the participant as a friend. The friendship ties are _directed_. In the visualization, arrows go from the participant to the individuals they named as friends. Reciprocated friendships have arrows on both ends.

In [None]:
G = load_tfls()
visualize_visjs(G, scale=750, title="Full Network")

## Social Status
### Pecking Orders
When members of a social group can be ordered in terms of social status, it forms a pecking order. Directed network data can be used to uncover an underlying pecking order if it exists. An unreciprocated friendship can be a sign of a difference in social standing. The person who doesn't list the friendship may do so because they have too many friends to list, or because they don't want to list someone unpopular.

To find a pecking order, we look for a _minimum-violation ranking_: an ordering with as few links going from high-status to low-status individuals as possible.
Such an ordering may or may not exist.
The more violoations in the MVR, the less heirarchical a group is.
Unfortunately, finding the MVR is very difficult even with a lot of computing power
(in computer science terms, it's NP-hard).
However, several approximation methods exist.
The example below uses SpringRank [BN2013] to find an approximate MVR by modeling individuals as particles connected by springs that pull towards popular individuals and push away from unpopular ones.

In [None]:
# Helper functions 

def scale(x):
    return (x - np.mean(x)) / np.std(x)

def giant_component(G):
    giant_component = sorted(nxalg.weakly_connected_components(G), reverse=True, key=len)[0]
    for v in set(G.nodes()) - giant_component:
        G.remove_node(v)
    return G

def get_rank(G):
    nodes = list(G.nodes())
    A=nx.to_numpy_matrix(G,nodelist=list(nodes))
    rank = dict(zip(nodes, scale(SpringRank(A,alpha=0.0,l0=1.0,l1=1.0))))
    return rank

def plot_ordering(G, rank):
    nodes = list(G.nodes())
    pos = dict((v, (0, r*1500)) for v, r in rank.items())
    return visualize_visjs(G, layout=None, pos=pos, scale=500)

In [None]:
# Find the largest connected component
G = giant_component(G)
# Calculate the rank
rank = get_rank(G)
# Plot the ordering
plot_ordering(G, rank)

## References
[BN2013] De Bacco, C., Larremore, D. B., & Moore, C. (2017). A physical model for efficient ranking in networks. arXiv preprint arXiv:1709.09002.

[MA1997] L. Michell, and A. Amos, "Girls, pecking order and smoking." Social Science & Medicine 44(12), 1861-1869 (1997)

[KONECT2017] Facebook wall posts network dataset -- KONECT, April 2017.

[VMCG2009] Bimal Viswanath, Alan Mislove, Meeyoung Cha, and Krishna P. Gummadi. On the evolution of user interaction in Facebook. In Proc. Workshop on Online Social Networks, pages 37--42, 2009.