In [138]:
import json
from collections import deque

# Collect Data

Get data from https://lostcircles.com/. Download JSON(no pics) after up finish loading your network. Run the following function to get a adjacency list. One thing about the json file is that some edges only have one directions recorded but we should add both directions to the adj list.

In [139]:
path = 'fb_network.json'

def process_json(path): # Parse the json file into adjacency list and node list
    data = json.load(open(path))
    links = data["links"]
    nodes = data["nodes"]
    adj_list = {}
    for edge in links:
        
        if edge["source"] in adj_list:
            if edge["target"] not in adj_list[edge["source"]]:
                adj_list[edge["source"]].append(edge["target"])
        else:
            adj_list[edge["source"]] = [edge["target"]]
            
        
        if edge["target"] in adj_list:
            if edge["source"] not in adj_list[edge["target"]]:
                adj_list[edge["target"]].append(edge["source"])
        else:
            adj_list[edge["target"]] = [edge["source"]]
            
    return adj_list, nodes

adj_list, nodes = process_json(path)

# Tie Strength

In [140]:
# c = 0
# for i in nodes:
#     if i['name'] == 'Andrew Cui':
#         print(c)
#     c += 1

## This code looks for the index of a user's name

In [141]:
def common_neighbor(adj_list, node1, node2):  #Find the list of common neighbors of node1 and node2
    result = []
    for nei in adj_list[node1]:
        if nei in adj_list[node2]:
            result.append(nei)
    return result


def dispersion(adj_list, node1, node2, threshold = 1, normalized = False): #Calculate the dispersion
    common_nei = common_neighbor(adj_list, node1, node2)
    result = 0
    for i in range(len(common_nei) - 1):
        for j in range(i + 1, len(common_nei)):
            result += distance(threshold, adj_list, common_nei[i], common_nei[j])

    if normalized:
        if len(common_nei) <= 1:
            return 0
        return result/len(common_nei)

    return result

def distance(threshold, adj_list, u, v): 
    # Use BFS to check if distance between u and v are within threshold, return 1 when dist > threshold, 0 when <=
    queue = deque([u]) # Queue data structure
    explored = {u} # Set data structure for O(1) lookup
    count = 0
    while(len(queue) != 0 and count < threshold):
        cur_layer = len(queue)
        
        for i in range(cur_layer):
            cur = queue.popleft()
            for nei in adj_list[cur]:
                if nei == v:
                    return 0
                if nei not in explored:
                    queue.append(nei)
                    explored.add(nei)
                    
        count += 1
    return 1

In [142]:
adj = {}   # This is the graph in the slide, used for testing
adj['a'] = ['b', 'c', 'u']
adj['b'] = ['a', 'c', 'd', 'e', 'f', 'u']
adj['c'] = ['a', 'b', 'd', 'f', 'h', 'u']
adj['d'] = ['c', 'd', 'f', 'u']
adj['e'] = ['b', 'f', 'u']
adj['f'] = ['c', 'd', 'b', 'e', 'h', 'u']
adj['g'] = ['u']
adj['h'] = ['c', 'f', 'k', 'j', 'u']
adj['i'] = ['k', 'j', 'u']
adj['j'] = ['h', 'u', 'k', 'i']
adj['k'] = ['i', 'j', 'u']
adj['u'] = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k']

In [143]:
distance(1, adj, 'a', 'b')

0

In [144]:
dispersion(adj, 'h', 'u')

4

In [145]:
dispersion(adj, 'h', 'u', 1, True)

1.0

In [146]:
nodes[11]

{'dataUrl': '',
 'id': '100009517843209',
 'name': 'Andrew Cui',
 'profile': 'https://www.facebook.com/andrew.cui.75?fref=pb&hc_location=friends_tab',
 'userName': 'andrew.cui.75'}

In [147]:
nodes[30]

{'dataUrl': '',
 'id': '100004068944907',
 'name': 'Ayoub Belemlih',
 'profile': 'https://www.facebook.com/belemlih.ayoub?fref=pb&hc_location=friends_tab',
 'userName': 'belemlih.ayoub'}

In [148]:
dispersion(adj_list, 11, 30)

155

In [149]:
def recursive_dispersion(adj_list, node, max_iterations = 1, threshold = 1):
    values = {}  # dictionary with [nodeX : [dispersion1, dispersion2...]] pairs
    for nei in adj_list.keys():   # Initialize all the dispersion values to be 1
        if nei != node:
            values[nei] = [1]
    
    iteration_num = 0
    while iteration_num < max_iterations:
        for nei in values.keys():
            values[nei].append(helper(adj_list, node, nei, values, iteration_num, threshold))
        iteration_num += 1
        
    return values


def helper(adj_list, u, v, values, iteration_num, threshold):
    common_nei = common_neighbor(adj_list, u, v)
    if len(common_nei) <= 1:
        return 0
    
    result = 0
    for nei in common_nei:
        result += values[nei][iteration_num] * values[nei][iteration_num]
    
    for i in range(len(common_nei) - 1):
        for j in range(i + 1, len(common_nei)):
            result += 2 * distance(threshold, adj_list, common_nei[i], common_nei[j]) * values[common_nei[i]][iteration_num] * values[common_nei[j]][iteration_num]
    
    return result/len(common_nei)

In [150]:
dispersions = recursive_dispersion(adj_list, 11, 3, 1)

In [151]:
def find_max_dispersion(dispersions):
    result = 0
    max_dispersion = 0
    for key in dispersions.keys():
        if dispersions[key][len(dispersions[key]) - 1] > max_dispersion:
            max_dispersion = dispersions[key][len(dispersions[key]) - 1] #compare the last number and find the max
            result = key
    return result

In [152]:
find_max_dispersion(dispersions)

45

In [153]:
nodes[45]

{'dataUrl': '',
 'id': '100001107732647',
 'name': 'Jongwon Han',
 'profile': 'https://www.facebook.com/hans.jongwon?fref=pb&hc_location=friends_tab',
 'userName': 'hans.jongwon'}