### Load Data

In [29]:
import json
import itertools
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter, defaultdict
%matplotlib inline

message_counts = defaultdict(Counter)

with open('../../local_data/graph.json', 'r') as infile:
    data = json.load(infile)
    for message in data:
        message_counts[message['sender']].update(message['recipients'])
message_matrix = pd.DataFrame(message_counts)
adj_list = message_matrix.stack().reset_index()
adj_list.columns = ['from', 'to', 'count']
adj_list['weight'] = adj_list['count'].max() - adj_list['count'] 


graph = nx.DiGraph()
graph.add_weighted_edges_from(adj_list[['from', 'to', 'weight']].itertuples(index=False))

centroid = 'Jeff Skilling'

treeform = nx.shortest_path(graph, source=centroid, weight='weight')
treenodes = {n: {'name': n, 'children':[]} for n in graph.nodes() }

def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = itertools.tee(iterable)
    next(b, None)
    return zip(a, b)

for path in treeform.values():
    for start, finish in pairwise(path):
        root = treenodes[start]['children']
        leaf = treenodes[finish]
        if leaf not in root:
            root.append(leaf)        

with open('data.json', 'w') as outfile:
    json.dump(treenodes[centroid], outfile, sort_keys=True, indent=4, separators=(',', ': '))
print('done')
        

done


### Function to rank an adjacency list based on a given alpha