In [20]:
import numpy as np
import pandas as pd
import networkx as nx
from random import random

In [7]:
graph = nx.read_edgelist('./data/facebook_combined.txt', create_using=nx.Graph(), nodetype=int)

### Question 16

In [22]:
target_node = 414
personalized_network = nx.ego_graph(graph, target_node, radius=1)
users = []

for neighbor in personalized_network.neighbors(target_node):
    if personalized_network.degree(neighbor) == 24:
        users.append(neighbor)

print(len(users))

11


### Question 17

In [49]:
def common_neighbors(graph, node1, node2):
    neighbors1 = set(graph.neighbors(node1))
    neighbors2 = set(graph.neighbors(node2))
    return len(neighbors1 & neighbors2)

def jaccard_coefficient(graph, node1, node2):
    neighbors1 = set(graph.neighbors(node1))
    neighbors2 = set(graph.neighbors(node2))
    return len(neighbors1 & neighbors2) / len(neighbors1 | neighbors2)

def adamic_adar(graph, node1, node2):
    neighbors1 = set(graph.neighbors(node1))
    neighbors2 = set(graph.neighbors(node2))
    common_neighbors = neighbors1 & neighbors2
    return sum(1 / np.log(graph.degree(neighbor)) for neighbor in common_neighbors)

def calc_accuracy(graph, node, new_friend_node):
    node_neighbors = set(graph.neighbors(node))
    friend_neighbors = set(graph.neighbors(new_friend_node))
    return len(node_neighbors & friend_neighbors) / len(node_neighbors)

In [89]:
def avg_acc_friend_recc(n_iters, friend_metric_func, users):
    accuracy1 = 0
    for user_i in users:
        accuracy2 = 0
        for _ in range(n_iters):
            neighborhood = personalized_network.neighbors(user_i)
            n = personalized_network.degree(user_i)
            n_users_deleted = sum(np.random.rand(n) < 0.25)
        
            friend_nodes = []
            friend_scores = []
            for node in personalized_network.nodes():
                if node not in neighborhood:
                    friend_nodes.append(node)
                    friend_scores.append(friend_metric_func(personalized_network, user_i, node))

            inds = np.argsort(friend_scores)[::-1][:n_users_deleted]
            accuracy3 = 0
            for node in np.array(friend_nodes)[inds]:
                accuracy3 += calc_accuracy(personalized_network, user_i, node)
            accuracy2 += accuracy3 / n_users_deleted
        accuracy1 += accuracy2 / n_iters
    return accuracy1 / len(users)

print(avg_acc_friend_recc(10, common_neighbors, users))
print(avg_acc_friend_recc(10, jaccard_coefficient, users))
print(avg_acc_friend_recc(10, adamic_adar, users))

0.9468760521885522
0.9322657309020946
0.9439814163109617
