In [67]:

graph = {}
with open('v2v.txt', 'r') as f:
  #  next(f) # skip the header line
    for line in f:
        u, v = map(int, line.split())
        if u not in graph:
            graph[u] = set()
        if v not in graph:
            graph[v] = set()
        graph[u].add(v)


In [68]:
print(graph)

{0: {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 1: {3328, 3329, 1091, 3082, 3084, 1517, 310, 1213, 3326, 3327}, 2: set(), 3: {3472}, 4: {3094}, 5: {3460, 1465, 269, 3151, 2649, 2648, 761}, 6: {1066, 2251, 2252, 2256, 2257, 1815, 1978, 3324, 3325, 1247}, 7: set(), 8: set(), 9: {3330, 3331, 3332, 3333, 1576, 10, 1009, 1914, 3259}, 10: {807, 808, 809, 810, 811, 748, 812, 813, 814, 731}, 310: set(), 1091: {322, 68, 1093, 1094, 1092}, 1213: {962, 867, 3205, 1927, 2186, 1743, 2583, 1595, 573, 223}, 1517: set(), 3082: set(), 3084: set(), 3326: set(), 3327: {5313, 3558, 1098, 1867, 1070, 4338, 4757, 5624, 5625, 2910}, 3328: {737, 1889, 485, 2919, 1130, 1675, 2095, 5170, 1301, 4118}, 3329: {3785, 5693}, 3472: set(), 3094: set(), 269: set(), 761: set(), 1465: set(), 2648: set(), 2649: set(), 3151: set(), 3460: set(), 1066: set(), 1247: {1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1240}, 1815: {1122, 1673, 501, 1816, 1817, 1818, 1819, 1820, 1821}, 1978: {1829}, 2251: {659}, 2252: set(), 2256: {393

# **calculate diameter**

In [69]:
from collections import deque

def bfs(graph, start):
   visited = set()
   queue = deque([(start, 0)])
   max_dist = 0

   while queue:
       node, dist = queue.popleft()
       if node not in visited:
           visited.add(node)
           max_dist = max(max_dist, dist)
           for neighbor in graph[node]:
               queue.append((neighbor, dist + 1))

   return max_dist

def diameter(graph):
   max_dist = 0
   for node in graph:
       max_dist = max(max_dist, bfs(graph, node))

   return max_dist


In [70]:
dimeter_graph = diameter(graph)

print("Diameter of the graph:", dimeter_graph)

Diameter of the graph: 22


In [71]:
num_iteration = dimeter_graph // 4

In [72]:
print(num_iteration)

5


In [73]:
reverse_graph = {}

with open('v2v.txt', 'r') as f:
   for line in f:
       u, v = map(int, line.split())
       if u not in reverse_graph:
           reverse_graph[u] = set()
       if v not in reverse_graph:
           reverse_graph[v] = set()
       reverse_graph[v].add(u) # Swap u and v here

In [74]:
print(reverse_graph)

{0: {5762, 3259, 5734}, 1: {0, 8131, 3820, 207, 3444, 1974, 1878, 1976}, 2: {0}, 3: {0, 3273, 4882, 2453}, 4: {0}, 5: {0, 609, 2728, 402, 819, 308, 1145}, 6: {0, 2610}, 7: {0, 2433, 1666, 3719, 5498, 1940, 154, 7965, 57, 4030, 4544, 1242, 2525, 3044, 5232, 6513, 6899, 7930, 2430}, 8: {0, 833, 166, 231, 1356, 1199, 83}, 9: {0, 491, 3564, 6160, 7314, 403, 694}, 10: {0, 8288, 9, 5232, 4918, 6710}, 310: {1, 306, 1217}, 1091: {1, 8267, 4133, 1982}, 1213: {3784, 1, 6944, 1212}, 1517: {1, 1217, 3558, 1512, 4776, 4970, 819, 724, 4565, 1783, 1305}, 3082: {1, 1217, 2673, 1305, 5407}, 3084: {1, 2018, 195, 1217, 4267, 3564, 3820, 4365, 5165, 6957, 8558, 3122, 7319, 7960, 8027}, 3326: {1}, 3327: {1, 8679, 430, 4631}, 3328: {1, 6795}, 3329: {1, 2754, 2378, 2414, 1497}, 3472: {3, 1372, 8508}, 3094: {4, 7909, 7273, 1970, 3093, 4986}, 269: {261, 5, 872, 1777, 6994, 4564, 56}, 761: {416, 5, 4041, 8684, 754, 1330, 7763, 3036}, 1465: {6432, 4481, 5, 5926, 2055, 4938, 6956, 8492, 5007, 3122, 1459, 3285, 31

# **calculate similarity**

In [75]:
# Create a dictionary to store previously calculated similarities
memo = {}

def calculate_similarity(graph, a , b, c = 0.5 , max_depth = num_iteration ):

 # Check if the result is already stored in the memo
 if (a, b, max_depth) in memo:
      return memo[(a, b, max_depth)]

 Ia = graph[a]  # We assume reverse graph as argument so now this line give us input neighbors
 Ib = graph[b]

 depth = max_depth

 if depth == 0 :

   if a == b :

     return 1

   elif a!= b :

     return 0

 # If either Ia or Ib is empty, return 0
 if not Ia or not Ib:
     return 0

 similarity = 0

 for i in Ia:
    for j in Ib:
       similarity += calculate_similarity(graph, i, j, c, depth - 1)

  # Store the result in the memo before returning it

 memo[(a, b, max_depth)] = c / ((len(Ia) + len(Ib)) / 2) * similarity


 return c / ((len(Ia) + len(Ib)) / 2) * similarity



In [76]:
from tqdm import tqdm

def find_most_similar(graph, specified_nodes, num_results=10):
 top_nodes = {}

 for node in tqdm(specified_nodes):
    similarities = {}
    neighboring_nodes = graph[node]

    for neighbor in neighboring_nodes:
       similarity = calculate_similarity(graph, node, neighbor, c=0.5, max_depth=num_iteration)
       similarities[neighbor] = similarity

    sorted_similarities = sorted(similarities.items(), key=lambda x: x[1], reverse=True) # Sort by similarity score
    top_nodes[node] = sorted_similarities[:num_results] # Store the top num_results nodes for each specified node

 return top_nodes



In [77]:
nodes = [688, 387, 277, 876, 999, 1777, 6319]
top_10_similar = find_most_similar(reverse_graph, nodes)



100%|██████████| 7/7 [07:09<00:00, 61.37s/it]


In [78]:
for node, similar_nodes in top_10_similar.items():
  print(f"Top 10 similar nodes to node {node}: ", end="")
  for similar_node, _ in similar_nodes:
      print(f"{similar_node}", end=" ")
  print("\n")

Top 10 similar nodes to node 688: 226 229 854 5299 5008 621 4865 3077 1648 5824 

Top 10 similar nodes to node 387: 135 1156 5279 2459 1417 2748 4179 4117 80 3845 

Top 10 similar nodes to node 277: 854 350 2545 1744 2198 1002 1882 614 1399 1926 

Top 10 similar nodes to node 876: 854 2545 304 135 1648 4219 4770 3251 4072 1015 

Top 10 similar nodes to node 999: 391 385 2209 3785 135 206 1926 1587 5630 2748 

Top 10 similar nodes to node 1777: 2209 2545 5008 1251 719 4675 3721 4153 3507 2808 

Top 10 similar nodes to node 6319: 4950 4551 8281 

