In [1]:
import os
import json
import collections

In [2]:
DATA_DIR = ''
assert os.path.isdir(DATA_DIR)

fname = 'links.json'
fpath = os.path.join(DATA_DIR, fname)
assert os.path.isfile(fpath)

In [3]:
with open(fpath, 'r') as f:
    data = json.load(f)

In [4]:
def create_node_dict(data):
    node_data = dict()
    attr_fields = ['user_id', 'name', 'email', 'start_date']

    for entity in data:
        node_id = entity.get('user_id')
        attrs = node_data.get(node_id, {})
        for field in attr_fields:
            values = attrs.get(field, [])
            new_value = entity.get(field)
            if not new_value is None:
                values.append(new_value)
            attrs[field] = values
        node_data[node_id] = attrs

    delimiter = ','
    node_lookup = dict()
    for node_id, attrs in node_data.items():
        rec = dict()
        for field in attr_fields:
            values = attrs.get(field, [])
            if field == 'start_date':
                rec[field] = min(values)
                continue
            if values:
                rec[field] = delimiter.join(values)
        node_lookup[node_id] = rec
    
    return node_lookup

In [5]:
def create_edge_table(data):
    table = []
    attr_fields = ['method', 'value', 'first_activity_date', 'last_activity_date']
    default_value = 'None'
    for entity in data:
        source_id = entity.get('user_id')
        shared_data = entity.get('shared_data', [])
        if shared_data:
            for d in shared_data:
                target_id = d.get('user_id')
                if not target_id is None:
                    edge_data = dict()
                    for field in attr_fields:
                        edge_data[field] = d.get(field, default_value)
                    edge = {
                        'source': source_id,
                        'target': target_id,
                        'data': edge_data
                    }
                    table.append(edge)
    return table

In [6]:
edges = create_edge_table(data)

In [7]:
labeling_dict = collections.defaultdict(list)

for edge in edges:
    for label in ['source', 'target']:
        id_value = edge[label]
        labeling_dict[id_value].append(label)

color_map = {'source & target': 'red', 'source': 'yellow', 'target': 'gray'}
color_dict = dict(labeling_dict)

for node_id in color_dict:
    counts = collections.Counter(color_dict.get(node_id))
    res = ' & '.join(sorted(list(dict(counts).keys())))
    color_dict[node_id] = color_map.get(res, 'gray')

In [8]:
def apply_node_attributes(
    edges,
    node_dict={},
    color_dict={},
    labels=('source', 'target')
):
    table = []
    for edge in edges:
        for label in labels:
            edge[f"{label}_attrs"] = node_dict.get(edge[label], {})
            edge[f"{label}_color"] = color_dict.get(edge[label], 'gray')
        table.append(edge)
    return table

In [9]:
def dedupe_edges(edges):
    edge_set = set()
    table = []
    for edge in edges:
        data_string = f"{edge['data']['method']} | {edge['data']['value']}"
        t1 = (edge['source'], edge['target'], data_string)
        t2 = (edge['target'], edge['source'], data_string)
        if t1 in edge_set:
            continue
        else:
            edge_set.add(t1)
            edge_set.add(t2)
            table.append(edge)
    return table

In [10]:
edge_table = apply_node_attributes(dedupe_edges(edges), node_dict=create_node_dict(data), color_dict=color_dict)