In [26]:
from itertools import combinations
import numpy as np
import pandas as pd
import random
import networkx as nx

In [27]:
def compute_source_target(df):
    # Xác định số lượng người dùng và số lượng phim duy nhất từ dữ liệu
    unique_users = df.user_id.unique()
    num_users = len(unique_users)

    source_target = []

    # Tính toán trọng số giữa người dùng với các người dùng khác
    for i in range(num_users):
        for j in range(i+1, num_users):
            user1 = unique_users[i]
            user2 = unique_users[j]

            source_target.append((user1, user2))

    return source_target

In [28]:
def k_cliques(graph):
    # 2-cliques
    cliques = [{i, j} for i, j in graph.edges() if i != j]
    k = 2

    while cliques:
        # result
        yield k, cliques

        # merge k-cliques into (k+1)-cliques
        cliques_1 = set()
        for u, v in combinations(cliques, 2):
            w = u ^ v
            if len(w) == 2 and graph.has_edge(*w):
                cliques_1.add(tuple(u | w))

        # remove duplicates
        cliques = list(map(set, cliques_1))
        k += 1

In [29]:
def print_cliques(G, size_k):
    for k, cliques in k_cliques(G):
        if k == size_k:
            print('%d-cliques = %d, %s.' % (k, len(cliques), cliques))

In [30]:
df = pd.read_csv("C:/Users/UYEN/OneDrive - Trường ĐH CNTT - University of Information Technology/Đại Học/Năm 4/Làm nhóm HK2 2022-2023/Mạng xã hội/FinalProject/RatingCSV.csv")
df.head(5)

Unnamed: 0,user_id,movie_id,rating
0,1,101,5
1,1,102,3
2,1,103,3
3,1,104,3
4,1,105,2


In [31]:
unique_users = df.user_id.unique()
num_users = len(unique_users)
num_users

7

In [32]:
source_target = compute_source_target(df)
source_target

[(1, 2),
 (1, 3),
 (1, 4),
 (1, 5),
 (1, 6),
 (1, 7),
 (2, 3),
 (2, 4),
 (2, 5),
 (2, 6),
 (2, 7),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7),
 (4, 5),
 (4, 6),
 (4, 7),
 (5, 6),
 (5, 7),
 (6, 7)]

In [33]:
G = nx.Graph()
G.add_edges_from(source_target)

In [36]:
size_k = random.randint(2, num_users)
print(f"size_k: {size_k}")
print_cliques(G, size_k)

size_k: 7
7-cliques = 1, [{1, 2, 3, 4, 5, 6, 7}].


In [37]:
size_k = random.randint(2, num_users)
print(f"size_k: {size_k}")
print_cliques(G, size_k)

size_k: 6
6-cliques = 7, [{1, 2, 3, 4, 5, 7}, {1, 2, 3, 4, 6, 7}, {1, 2, 3, 5, 6, 7}, {2, 3, 4, 5, 6, 7}, {1, 2, 4, 5, 6, 7}, {1, 2, 3, 4, 5, 6}, {1, 3, 4, 5, 6, 7}].


In [42]:
size_k = random.randint(2, num_users)
print(f"size_k: {size_k}")
print_cliques(G, size_k)

size_k: 3
3-cliques = 35, [{3, 5, 6}, {1, 5, 7}, {2, 5, 7}, {1, 2, 5}, {4, 5, 6}, {1, 3, 6}, {4, 6, 7}, {1, 4, 5}, {3, 4, 7}, {2, 4, 5}, {3, 6, 7}, {2, 3, 6}, {1, 5, 6}, {5, 6, 7}, {1, 2, 4}, {2, 5, 6}, {1, 2, 7}, {2, 6, 7}, {3, 4, 6}, {1, 4, 7}, {1, 3, 5}, {2, 4, 7}, {2, 3, 5}, {1, 6, 7}, {3, 5, 7}, {1, 2, 6}, {4, 5, 7}, {1, 2, 3}, {1, 4, 6}, {1, 3, 4}, {1, 3, 7}, {3, 4, 5}, {2, 3, 4}, {2, 3, 7}, {2, 4, 6}].


In [43]:
size_k = random.randint(2, num_users)
print(f"size_k: {size_k}")
print_cliques(G, size_k)

size_k: 5
5-cliques = 21, [{3, 4, 5, 6, 7}, {1, 3, 4, 5, 6}, {1, 2, 3, 4, 6}, {2, 3, 4, 5, 7}, {2, 3, 5, 6, 7}, {1, 3, 4, 6, 7}, {1, 2, 3, 5, 7}, {1, 2, 4, 5, 7}, {1, 2, 3, 4, 5}, {2, 3, 4, 5, 6}, {2, 4, 5, 6, 7}, {1, 2, 5, 6, 7}, {2, 3, 4, 6, 7}, {1, 2, 3, 5, 6}, {1, 2, 4, 5, 6}, {1, 3, 4, 5, 7}, {1, 4, 5, 6, 7}, {1, 2, 3, 4, 7}, {1, 3, 5, 6, 7}, {1, 2, 4, 6, 7}, {1, 2, 3, 6, 7}].


In [38]:
size_k = random.randint(2, num_users)
print(f"size_k: {size_k}")
print_cliques(G, size_k)

size_k: 4
4-cliques = 35, [{1, 3, 6, 7}, {2, 3, 4, 6}, {1, 2, 3, 4}, {1, 4, 6, 7}, {1, 2, 3, 7}, {1, 2, 4, 6}, {2, 4, 5, 7}, {1, 3, 4, 6}, {3, 5, 6, 7}, {3, 4, 5, 6}, {2, 3, 5, 7}, {1, 2, 5, 7}, {2, 5, 6, 7}, {1, 3, 5, 7}, {2, 3, 4, 5}, {1, 2, 3, 6}, {1, 2, 4, 5}, {1, 4, 5, 7}, {1, 3, 4, 5}, {2, 4, 5, 6}, {3, 4, 6, 7}, {2, 3, 5, 6}, {2, 3, 6, 7}, {1, 2, 5, 6}, {1, 5, 6, 7}, {2, 4, 6, 7}, {1, 3, 5, 6}, {2, 3, 4, 7}, {1, 4, 5, 6}, {1, 2, 4, 7}, {1, 2, 3, 5}, {1, 3, 4, 7}, {3, 4, 5, 7}, {4, 5, 6, 7}, {1, 2, 6, 7}].


In [58]:
size_k = random.randint(2, num_users)
print(f"size_k: {size_k}")
print_cliques(G, size_k)

size_k: 2
2-cliques = 21, [{1, 2}, {1, 3}, {1, 4}, {1, 5}, {1, 6}, {1, 7}, {2, 3}, {2, 4}, {2, 5}, {2, 6}, {2, 7}, {3, 4}, {3, 5}, {3, 6}, {3, 7}, {4, 5}, {4, 6}, {4, 7}, {5, 6}, {5, 7}, {6, 7}].
