In [48]:
pip install pyspark



In [49]:
from pyspark import SparkContext
from pyspark import SparkConf

In [50]:
conf = SparkConf().setAppName("PeopleYouMightKnow")
sc = SparkContext.getOrCreate()

In [51]:
edges = sc.textFile("drive/MyDrive/Colab Notebooks/data-Q2/ego-facebook.txt")

def parse_node(edge):
    nodes = edge.split(" ")
    return (int(nodes[0]), int(nodes[0])), (int(nodes[1]), int(nodes[1]))

def parse_edge(edge):
    nodes = edge.split(" ")
    return (int(nodes[0]), int(nodes[1])), (int(nodes[1]), int(nodes[0]))

def parse_second_degree_edge(x):
    return (x[1][0], x[1][1]), (x[1][1], x[1][0])

self_nodes = edges.flatMap(lambda edge: parse_node(edge)).distinct()
friendships = edges.flatMap(lambda edge: parse_edge(edge)).distinct()

potential_second_degree = friendships.map(lambda x: (x[1], x[0])).join(friendships).flatMap(lambda x: parse_second_degree_edge(x))
second_degree_friendships = ((potential_second_degree.subtract(friendships)).subtract(self_nodes)).distinct()

second_degree_friendships_set = set(second_degree_friendships.collect())

In [52]:
def top_10_recommendations(pair):
    user, recommendations = pair
    return (user, [rec[0] for rec in sorted(recommendations, key=lambda x: (-x[1], x[0]))[:10]])

mutual_friends = (friendships.map(lambda x: (x[1], x[0])).join(friendships.map(lambda x: (x[0], x[1])))).map(lambda x: ((x[1][0], x[1][1]), 1)).reduceByKey(lambda a, b: a + b)
filtered_mutual_friends = mutual_friends.filter(lambda x: x[0] in second_degree_friendships_set)

recommendations = filtered_mutual_friends.map(lambda x: (x[0][0], (x[0][1], x[1]))).groupByKey().map(top_10_recommendations)

users = [10, 152, 288, 603, 714, 1525, 2434, 2681]
for user in users:
    user_recommendations = recommendations.filter(lambda x: x[0] == user).collect()
    print(f"Recommendations for {user}: {user_recommendations[0][1] if user_recommendations else '[]'}")


Recommendations for 10: [2, 3, 4, 5, 6, 7, 8, 9, 11, 12]
Recommendations for 152: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Recommendations for 288: [71, 1525, 69, 90, 217, 2348, 2351, 2352, 2354, 2356]
Recommendations for 603: [1, 289, 290, 291, 292, 293, 294, 295, 296, 297]
Recommendations for 714: [1, 712, 713, 715, 717, 718, 1525, 90, 217, 247]
Recommendations for 1525: [288, 1, 710, 714, 603]
Recommendations for 2434: [71, 288, 711, 716, 719, 720, 2348, 2351, 2352, 2354]
Recommendations for 2681: [71, 288, 710, 711, 716, 719, 720, 721, 722, 2348]
