In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
def compute_adjacency_matrix(df):
    # Xác định số lượng người dùng và số lượng phim duy nhất từ dữ liệu
    unique_users = df.user_id.unique()
    unique_movies = df.movie_id.unique()
    num_users = len(unique_users)
    num_movies = len(unique_movies)

    # Tạo ma trận kề ban đầu với kích thước tương ứng với số lượng người dùng và phim
    adjacency_matrix = np.zeros((num_users, num_movies))

    # Đánh dấu 1 cho các liên kết giữa người dùng và phim trong dữ liệu Netflix
    for user, movie in df[['user_id', 'movie_id']].values:
        user_index = np.where(unique_users == int(user))[0][0]
        movie_index = np.where(unique_movies == int(movie))[0][0]
        adjacency_matrix[user_index, movie_index] = 1

    return np.array(adjacency_matrix)

In [3]:
def compute_user_weight_matrix(df):
    # Xác định số lượng người dùng và số lượng phim duy nhất từ dữ liệu
    unique_users = df.user_id.unique()
    unique_movies = df.movie_id.unique()
    num_users = len(unique_users)
    num_movies = len(unique_movies)

    # Tạo ma trận trọng số giữa người dùng với các người dùng khác
    weight_matrix = np.zeros((num_users, num_users))
    source_target = []

    # Tính toán trọng số giữa người dùng với các người dùng khác
    for i in range(num_users):
        for j in range(i+1, num_users):
            user1 = unique_users[i]
            user2 = unique_users[j]
            
            # Tính số lượng phim chung đã xem
            common_movies = set()
            common_movies_user_1 = set()
            common_movies_user_2 = set()
            for user, movie in df[['user_id', 'movie_id']].values:
                if user == user1:
                    common_movies_user_1.add(int(movie))
                elif user == user2:
                    common_movies_user_2.add(int(movie))
                common_movies = common_movies_user_1.intersection(common_movies_user_2)

            # Gán trọng số tương tự nhau dựa trên số lượng phim chung đã xem
            weight_matrix[i, j] = len(common_movies)
            weight_matrix[j, i] = len(common_movies)
            source_target.append([user1, user2, weight_matrix[i][j]])

    return weight_matrix, np.array(source_target)

In [4]:
def compute_sum_tot(source_target, source, target, update_source_target = None):
    sum_source = 0
    sum_target = 0
    value_source_target = 0
    if update_source_target == None: 
        for item in source_target:
            if item[0] == source:
                sum_source = sum_source + item[2]
            elif item[0] == target:
                sum_target = sum_target + item[2]
            if item[0] == source and item[1] == target:
                value_source_target = value_source_target + item[2]
    else:
        source= np.array(source)
        target= np.array(target)
        for key, item in update_source_target.items():
            if item[0] in source:
                sum_source = sum_source + item[2]
            elif item[0] in target:
                sum_target = sum_target + item[2]
            if item[0] in source and item[1] in target:
                value_source_target = value_source_target + item[2]
    return sum_source + sum_target - value_source_target

In [5]:
def compute_k_i(source_target, source, target, update_source_target = None):
    k_i = 0

    if update_source_target == None:
        for item in source_target:
            if item[0] == source and item[1] != target:
                k_i = k_i + 1
            elif item[0] == target:
                k_i = k_i + 1
            elif item[1] == target and item[0] != source:
                k_i = k_i + 1
            elif item[1] == source:
                k_i = k_i + 1
    else:
        source= np.array(source)
        target= np.array(target)
        for key, item in update_source_target.items():
            print(f'user1: {item[0]} --------- user2: {item[1]}')
            if ((len(item[1]) > 1 or len(item[0]) > 1) and (item[0] in source or item[1] in source or item[0] in target or item[1] in target)) :
                if (item[0] in source and item[1] in target):
                    continue
                else: 
                    if len(item[0]) > 1:
                        k_i = k_i + len(item[0]) - 1  
                    if len(item[1]) > 1:
                        k_i = k_i + len(item[1]) - 1   
                    # print(k_i)
            if item[0] in source and item[1] not in target:
                k_i  = k_i  + 1 ## 4
                # print(k_i)
            elif item[0] in target:
                k_i = k_i + 1 ## 4
                # print(k_i)
            elif item[1] in target and item[0] not in source:
                k_i = k_i + 1
                # print(k_i)
            elif item[1] in source:
                k_i = k_i + 1
                # print(k_i)
    return k_i

In [6]:
def sum_in_value(source_target, target):
    sum_in_value = 0
    if len(target) > 1: 
        for i in range(len(target)):
            for j in range(i + 1, len(target)):
                sum_in_value = sum_in_value + source_target[np.where((source_target[:, 0] == target[i]) & (source_target[:, 1] == target[j]))[0][0]][2]
    return sum_in_value

In [7]:
def k_in_value(group):
    k_in_value = 0
    if len(group) > 1: 
            for i in range(len(group)):
                for j in range(i + 1, len(group)):
                    print(f"{group[i]}-------{group[j]}")
                    k_in_value = k_in_value + 1
    return k_in_value

In [8]:
def compute_k_i_in(source_target, source, group):
    if len(group) <= 1:
        k_i_in = 1
    else: 
        k_i_in = 0
        source = np.array(source)
        group = np.array(group)
        for item in source_target:
            # print(f"user1: {item[0]} --------- user2: {item[1]}")
            if (item[0] in source and item[1] in group) :
                k_i_in = k_i_in + 1
                # print(k_i_in)
    return k_i_in + k_in_value(group)

In [9]:
def compute_2_m(source_target, weight_matrix, update_source_target = None):
    if update_source_target == None: 
        return np.sum(weight_matrix)
    else:
        _2m = 0
        for key, item in update_source_target.items():
            _2m = _2m + item[2]
        return 2 * _2m

In [10]:
def check_in_append(value_node, array_node):
    count = 0
    for i in value_node:
        for j in range(len(array_node)):
            if i in array_node[j]:
                count = count + 1
                break
    if count == 0:
        array_node.append(value_node)

In [11]:
def check_in(value_node, usr):
    count = 0
    for i in value_node:
        if i in usr:
            count = count + 1
            break
    if count == 0:
        return 0 #chua co
    else:
        return 1 #co

In [12]:
def the_number_of_node_in_steps(df, partition, choose_node = None):
    unique_users = df.user_id.unique()
    if len(partition) == 0:
            # unique_users = df.user_id.unique()
            usr = []
            for i in unique_users:
                usr.append([i])
            partition[0] = usr
    else:
            if len(choose_node) != 0:
                array_node = []
                for key, value in choose_node.items():
                    if len(array_node) == 0:
                        array_node.append(value[0])
                    else:
                        check_in_append(value[0], array_node)
                print(array_node)
            usr = []
            if len(partition) == 1:
                for i in range(len(array_node)):
                    for j in range(len(array_node[i])):
                        usr.append(array_node[i][j])
                usr1 = [usr]
                for i in unique_users:
                    if check_in([i], usr) == 0:
                        usr1.append([i])
                partition[len(partition)] = usr1
            else:
                for i in range(len(array_node)):
                    for j in range(len(array_node[i])):
                        for k in range(len(array_node[i][j])):
                            usr.append(array_node[i][j][k])
                usr1 = [usr]
                for i in unique_users:
                    # print(i)
                    if check_in([i], usr) == 0:
                        usr1.append([i])
                partition[len(partition)] = usr1

In [13]:
def update_weight_source_target(partition, source_target):
    # Xác định số lượng người dùng và số lượng phim duy nhất từ dữ liệu
    last_key = list(partition)[-1]
    unique_users = partition[last_key]
    unique_users.sort()
    num_users = len(unique_users)


    # Tạo ma trận trọng số giữa người dùng với các người dùng khác
    update_source_target = {}
         
    # Tính toán trọng số giữa người dùng với các người dùng khác
    count = 0
    for i in range(num_users):
      for j in range(i+1, num_users):
            user1 = unique_users[i]
            user2 = unique_users[j]

            # print(f'user1: {user1} ----------------- user2: {user2}')
            weight_value = 0
            for item1 in user1:
                  for item2 in user2:
                        weight_value = weight_value + source_target[np.where((source_target[:, 0] == item1) & (source_target[:, 1] == item2))[0][0]][2]
            update_source_target[count] = [user1, user2, weight_value]
            count = count + 1

    return update_source_target

In [14]:
df = pd.read_csv("C:/Users/UYEN/OneDrive - Trường ĐH CNTT - University of Information Technology/Đại Học/Năm 4/Làm nhóm HK2 2022-2023/Mạng xã hội/FinalProject/RatingCSV.csv")
df.head(5)

Unnamed: 0,user_id,movie_id,rating
0,1,101,5
1,1,102,3
2,1,103,3
3,1,104,3
4,1,105,2


In [15]:
adjacency_matrix = compute_adjacency_matrix(df)
adjacency_matrix

array([[1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1.],
       [1., 0., 1., 1., 0., 1.],
       [0., 0., 1., 1., 0., 1.],
       [1., 1., 1., 0., 1., 1.],
       [1., 0., 1., 1., 1., 1.],
       [0., 0., 1., 1., 1., 0.]])

In [16]:
weight_matrix, source_target = compute_user_weight_matrix(df)

In [17]:
print(weight_matrix)

[[0. 6. 4. 3. 5. 5. 3.]
 [6. 0. 4. 3. 5. 5. 3.]
 [4. 4. 0. 3. 3. 4. 2.]
 [3. 3. 3. 0. 2. 3. 2.]
 [5. 5. 3. 2. 0. 4. 2.]
 [5. 5. 4. 3. 4. 0. 3.]
 [3. 3. 2. 2. 2. 3. 0.]]


In [18]:
print(source_target)

[[1. 2. 6.]
 [1. 3. 4.]
 [1. 4. 3.]
 [1. 5. 5.]
 [1. 6. 5.]
 [1. 7. 3.]
 [2. 3. 4.]
 [2. 4. 3.]
 [2. 5. 5.]
 [2. 6. 5.]
 [2. 7. 3.]
 [3. 4. 3.]
 [3. 5. 3.]
 [3. 6. 4.]
 [3. 7. 2.]
 [4. 5. 2.]
 [4. 6. 3.]
 [4. 7. 2.]
 [5. 6. 4.]
 [5. 7. 2.]
 [6. 7. 3.]]


In [19]:
source_target[np.where((source_target[:, 0] == 5) & (source_target[:, 1] == 6))[0][0]][2]

4.0

In [20]:
unique_users = df.user_id.unique()
unique_movies = df.movie_id.unique()
num_users = len(unique_users)
num_movies = len(unique_movies)

In [21]:
partition = {}
the_number_of_node_in_steps(df, partition)
partition

{0: [[1], [2], [3], [4], [5], [6], [7]]}

In [22]:
num_nodes = adjacency_matrix.shape[0]
num_nodes

7

In [23]:
# num_nodes = adjacency_matrix.shape[0]
# m = np.sum(adjacency_matrix) / 2
# sum_tot = np.sum(adjacency_matrix, axis=1)
# modularity = 0.0
# print(num_nodes)
# print(m)
# print(sum_tot)
# print(modularity)

# Lần 1

In [24]:
#########Dang chay cai nay
dict_partition = {}
for i in range(len(source_target)):
    # user_group, sum_in
    user_group = [source_target[i][0], source_target[i][1]]
    sum_in = source_target[i][2] + sum_in_value(source_target, [source_target[i][1]])
    sum_tot = compute_sum_tot(source_target, source_target[i][0], source_target[i][1])
    k_i_in = compute_k_i_in(source_target, [source_target[i][0]], [source_target[i][1]]) 
    k_i = compute_k_i(source_target, source_target[i][0], source_target[i][1])
    _2m = compute_2_m(source_target, weight_matrix)
    Q = ((sum_in + k_i_in)/(_2m) - ((sum_tot + k_i)/_2m)**2 - (sum_in/_2m - ((sum_tot/_2m)**2) - ((k_i)/_2m)**2))
    #group - sum_in - sum_tot - k_i_in - k_i - 2m - Q
    dict_partition[i] = [user_group, sum_in, sum_tot, k_i_in, k_i, _2m, Q]
    

In [25]:
dict_partition

{0: [[1.0, 2.0], 6.0, 40.0, 1, 10, 148.0, -0.029766252739225695],
 1: [[1.0, 3.0], 4.0, 34.0, 1, 10, 148.0, -0.02428780131482835],
 2: [[1.0, 4.0], 3.0, 30.0, 1, 10, 148.0, -0.020635500365230097],
 3: [[1.0, 5.0], 5.0, 27.0, 1, 10, 148.0, -0.01789627465303141],
 4: [[1.0, 6.0], 5.0, 24.0, 1, 10, 148.0, -0.015157048940832727],
 5: [[1.0, 7.0], 3.0, 23.0, 1, 10, 148.0, -0.014243973703433166],
 6: [[2.0, 3.0], 4.0, 28.0, 1, 10, 148.0, -0.01880934989043096],
 7: [[2.0, 4.0], 3.0, 24.0, 1, 10, 148.0, -0.015157048940832727],
 8: [[2.0, 5.0], 5.0, 21.0, 1, 10, 148.0, -0.012417823228634037],
 9: [[2.0, 6.0], 5.0, 18.0, 1, 10, 148.0, -0.009678597516435353],
 10: [[2.0, 7.0], 3.0, 17.0, 1, 10, 148.0, -0.008765522279035789],
 11: [[3.0, 4.0], 3.0, 16.0, 1, 10, 148.0, -0.007852447041636231],
 12: [[3.0, 5.0], 3.0, 15.0, 1, 10, 148.0, -0.006939371804236665],
 13: [[3.0, 6.0], 4.0, 11.0, 1, 10, 148.0, -0.0032870708546384186],
 14: [[3.0, 7.0], 2.0, 10.0, 1, 10, 148.0, -0.002373995617238861],
 15: [[

In [26]:
sorted_data = sorted(dict_partition.items(), key=lambda x: x[-1], reverse=True)
choose_node = {}
for key, value in sorted_data:
    if value[-1] > 0:
        choose_node[key] = value
choose_node

{20: [[6.0, 7.0], 3.0, 0.0, 1, 10, 148.0, 0.006756756756756757],
 19: [[5.0, 7.0], 2.0, 4.0, 1, 10, 148.0, 0.003104455807158511],
 18: [[5.0, 6.0], 4.0, 5.0, 1, 10, 148.0, 0.002191380569758948],
 17: [[4.0, 7.0], 2.0, 5.0, 1, 10, 148.0, 0.002191380569758948],
 16: [[4.0, 6.0], 3.0, 7.0, 1, 10, 148.0, 0.00036523009495982237]}

In [27]:
the_number_of_node_in_steps(df, partition, choose_node)
partition

[[6.0, 7.0]]


{0: [[1], [2], [3], [4], [5], [6], [7]],
 1: [[6.0, 7.0], [1], [2], [3], [4], [5]]}

# Lần 2

In [28]:
update_source_target = update_weight_source_target(partition, source_target)
update_source_target

{0: [[1], [2], 6.0],
 1: [[1], [3], 4.0],
 2: [[1], [4], 3.0],
 3: [[1], [5], 5.0],
 4: [[1], [6.0, 7.0], 8.0],
 5: [[2], [3], 4.0],
 6: [[2], [4], 3.0],
 7: [[2], [5], 5.0],
 8: [[2], [6.0, 7.0], 8.0],
 9: [[3], [4], 3.0],
 10: [[3], [5], 3.0],
 11: [[3], [6.0, 7.0], 6.0],
 12: [[4], [5], 2.0],
 13: [[4], [6.0, 7.0], 5.0],
 14: [[5], [6.0, 7.0], 6.0]}

In [29]:
#########Dang chay cai nay
dict_partition = {}
for i in range(len(update_source_target)):
    # user_group, sum_in
    # print(f"user1: {update_source_target[i][0]} ------ user2: {update_source_target[i][1]}")
    # print(f"weight: {update_source_target[i][2]}")
    user_group = [update_source_target[i][0], update_source_target[i][1]]
    sum_in = update_source_target[i][2] + sum_in_value(source_target, update_source_target[i][1])
    # print(f"user group: {user_group} --------- weight: {sum_in}")
    sum_tot = compute_sum_tot(source_target, update_source_target[i][0], update_source_target[i][1], update_source_target)
    k_i_in = compute_k_i_in(source_target, update_source_target[i][0], update_source_target[i][1])
    k_i = compute_k_i(source_target, update_source_target[i][0], update_source_target[i][1], update_source_target)
    _2m = compute_2_m(source_target, weight_matrix, update_source_target)
    Q = ((sum_in + k_i_in)/(_2m) - ((sum_tot + k_i)/_2m)**2 - (sum_in/_2m - ((sum_tot/_2m)**2) - ((k_i)/_2m)**2))
    # print(f"sum_in: {sum_in}-------sum_tot: {sum_tot}---------k_i_in: {k_i_in}-----k_i:{k_i}-----2m:{_2m}---Q: {Q}")
    # #group - sum_in - sum_tot - k_i_in - k_i - 2m - Q
    dict_partition[i] = [user_group, sum_in, sum_tot, k_i_in, k_i, _2m, Q]
    

user1: [1] --------- user2: [2]
user1: [1] --------- user2: [3]
user1: [1] --------- user2: [4]
user1: [1] --------- user2: [5]
user1: [1] --------- user2: [6.0, 7.0]
user1: [2] --------- user2: [3]
user1: [2] --------- user2: [4]
user1: [2] --------- user2: [5]
user1: [2] --------- user2: [6.0, 7.0]
user1: [3] --------- user2: [4]
user1: [3] --------- user2: [5]
user1: [3] --------- user2: [6.0, 7.0]
user1: [4] --------- user2: [5]
user1: [4] --------- user2: [6.0, 7.0]
user1: [5] --------- user2: [6.0, 7.0]
user1: [1] --------- user2: [2]
user1: [1] --------- user2: [3]
user1: [1] --------- user2: [4]
user1: [1] --------- user2: [5]
user1: [1] --------- user2: [6.0, 7.0]
user1: [2] --------- user2: [3]
user1: [2] --------- user2: [4]
user1: [2] --------- user2: [5]
user1: [2] --------- user2: [6.0, 7.0]
user1: [3] --------- user2: [4]
user1: [3] --------- user2: [5]
user1: [3] --------- user2: [6.0, 7.0]
user1: [4] --------- user2: [5]
user1: [4] --------- user2: [6.0, 7.0]
user1: [5

In [30]:
dict_partition

{0: [[[1], [2]], 6.0, 40.0, 1, 10, 142.0, -0.03263241420353103],
 1: [[[1], [3]], 4.0, 34.0, 1, 10, 142.0, -0.02668121404483237],
 2: [[[1], [4]], 3.0, 30.0, 1, 10, 142.0, -0.022713747272366597],
 3: [[[1], [5]], 5.0, 27.0, 1, 10, 142.0, -0.019738147193017256],
 4: [[[1], [6.0, 7.0]], 11.0, 18.0, 3, 12, 142.0, -0.00029756000793493276],
 5: [[[2], [3]], 4.0, 28.0, 1, 10, 142.0, -0.020730013886133693],
 6: [[[2], [4]], 3.0, 24.0, 1, 10, 142.0, -0.01676254711366792],
 7: [[[2], [5]], 5.0, 21.0, 1, 10, 142.0, -0.013786947034318587],
 8: [[[2], [6.0, 7.0]], 11.0, 12.0, 3, 12, 142.0, 0.006843880182503467],
 9: [[[3], [4]], 3.0, 16.0, 1, 10, 142.0, -0.008827613568736365],
 10: [[[3], [5]], 3.0, 15.0, 1, 10, 142.0, -0.007835746875619911],
 11: [[[3], [6.0, 7.0]], 9.0, 6.0, 3, 12, 142.0, 0.013985320372941881],
 12: [[[4], [5]], 2.0, 11.0, 1, 10, 142.0, -0.0038682801031541354],
 13: [[[4], [6.0, 7.0]], 8.0, 2.0, 3, 12, 142.0, 0.018746280499900812],
 14: [[[5], [6.0, 7.0]], 9.0, 0.0, 3, 12, 142.0

In [31]:
sorted_data = sorted(dict_partition.items(), key=lambda x: x[-1], reverse=True)
choose_node = {}
for key, value in sorted_data:
    if value[-1] > 0:
        choose_node[key] = value
choose_node

{14: [[[5], [6.0, 7.0]], 9.0, 0.0, 3, 12, 142.0, 0.021126760563380288],
 13: [[[4], [6.0, 7.0]], 8.0, 2.0, 3, 12, 142.0, 0.018746280499900812],
 11: [[[3], [6.0, 7.0]], 9.0, 6.0, 3, 12, 142.0, 0.013985320372941881],
 8: [[[2], [6.0, 7.0]], 11.0, 12.0, 3, 12, 142.0, 0.006843880182503467]}

In [32]:
the_number_of_node_in_steps(df, partition, choose_node)
partition

[[[5], [6.0, 7.0]]]


{0: [[1], [2], [3], [4], [5], [6], [7]],
 1: [[1], [2], [3], [4], [5], [6.0, 7.0]],
 2: [[5, 6.0, 7.0], [1], [2], [3], [4]]}

# Lần 3

In [33]:
update_source_target = update_weight_source_target(partition, source_target)
update_source_target

{0: [[1], [2], 6.0],
 1: [[1], [3], 4.0],
 2: [[1], [4], 3.0],
 3: [[1], [5, 6.0, 7.0], 13.0],
 4: [[2], [3], 4.0],
 5: [[2], [4], 3.0],
 6: [[2], [5, 6.0, 7.0], 13.0],
 7: [[3], [4], 3.0],
 8: [[3], [5, 6.0, 7.0], 9.0],
 9: [[4], [5, 6.0, 7.0], 7.0]}

In [34]:
def compute_sum_tot(source_target, source, target, update_source_target = None):
    sum_source = 0
    sum_target = 0
    value_source_target = 0
    if update_source_target == None: 
        for item in source_target:
            if item[0] == source:
                sum_source = sum_source + item[2]
            elif item[0] == target:
                sum_target = sum_target + item[2]
            if item[0] == source and item[1] == target:
                value_source_target = value_source_target + item[2]
    else:
        source= np.array(source)
        target= np.array(target)
        for key, item in update_source_target.items():
            if item[0] in source:
                sum_source = sum_source + item[2]
            elif item[0] in target:
                sum_target = sum_target + item[2]
            if item[0] in source and item[1] in target:
                value_source_target = value_source_target + item[2]
    return sum_source + sum_target - value_source_target

In [35]:
sum_source = 0
sum_target = 0

In [36]:
source= np.array([1])
target= np.array([2])

In [37]:
#########Dang chay cai nay
dict_partition = {}
for i in range(len(update_source_target)):
    # # user_group, sum_in
    # print(f"user1: {update_source_target[i][0]} ------ user2: {update_source_target[i][1]}")
    # print(f"weight: {update_source_target[i][2]}")
    user_group = [update_source_target[i][0], update_source_target[i][1]]
    sum_in = update_source_target[i][2] + sum_in_value(source_target, update_source_target[i][1])
    # print(f"user group: {user_group} --------- weight: {sum_in}")
    sum_tot = compute_sum_tot(source_target, update_source_target[i][0], update_source_target[i][1], update_source_target)
    k_i_in = compute_k_i_in(source_target, update_source_target[i][0], update_source_target[i][1])
    k_i = compute_k_i(source_target, update_source_target[i][0], update_source_target[i][1], update_source_target)
    _2m = compute_2_m(source_target, weight_matrix, update_source_target)
    Q = ((sum_in + k_i_in)/(_2m) - ((sum_tot + k_i)/_2m)**2 - (sum_in/_2m - ((sum_tot/_2m)**2) - ((k_i)/_2m)**2))
    # print(f"sum_in: {sum_in}-------sum_tot: {sum_tot}---------k_i_in: {k_i_in}-----k_i:{k_i}-----2m:{_2m}---Q: {Q}")
    # #group - sum_in - sum_tot - k_i_in - k_i - 2m - Q
    dict_partition[i] = [user_group, sum_in, sum_tot, k_i_in, k_i, _2m, Q]
    

user1: [1] --------- user2: [2]
user1: [1] --------- user2: [3]
user1: [1] --------- user2: [4]
user1: [1] --------- user2: [5, 6.0, 7.0]
user1: [2] --------- user2: [3]
user1: [2] --------- user2: [4]
user1: [2] --------- user2: [5, 6.0, 7.0]
user1: [3] --------- user2: [4]
user1: [3] --------- user2: [5, 6.0, 7.0]
user1: [4] --------- user2: [5, 6.0, 7.0]
user1: [1] --------- user2: [2]
user1: [1] --------- user2: [3]
user1: [1] --------- user2: [4]
user1: [1] --------- user2: [5, 6.0, 7.0]
user1: [2] --------- user2: [3]
user1: [2] --------- user2: [4]
user1: [2] --------- user2: [5, 6.0, 7.0]
user1: [3] --------- user2: [4]
user1: [3] --------- user2: [5, 6.0, 7.0]
user1: [4] --------- user2: [5, 6.0, 7.0]
user1: [1] --------- user2: [2]
user1: [1] --------- user2: [3]
user1: [1] --------- user2: [4]
user1: [1] --------- user2: [5, 6.0, 7.0]
user1: [2] --------- user2: [3]
user1: [2] --------- user2: [4]
user1: [2] --------- user2: [5, 6.0, 7.0]
user1: [3] --------- user2: [4]
user

In [38]:
dict_partition

{0: [[[1], [2]], 6.0, 40.0, 1, 10, 130.0, -0.039644970414201175],
 1: [[[1], [3]], 4.0, 34.0, 1, 10, 130.0, -0.032544378698224866],
 2: [[[1], [4]], 3.0, 30.0, 1, 10, 130.0, -0.027810650887573965],
 3: [[[1], [5, 6.0, 7.0]], 22.0, 13.0, 6, 12, 130.0, 0.02769230769230771],
 4: [[[2], [3]], 4.0, 28.0, 1, 10, 130.0, -0.02544378698224852],
 5: [[[2], [4]], 3.0, 24.0, 1, 10, 130.0, -0.020710059171597628],
 6: [[[2], [5, 6.0, 7.0]], 22.0, 7.0, 6, 12, 130.0, 0.03621301775147928],
 7: [[[3], [4]], 3.0, 16.0, 1, 10, 130.0, -0.011242603550295863],
 8: [[[3], [5, 6.0, 7.0]], 18.0, 3.0, 6, 12, 130.0, 0.04189349112426036],
 9: [[[4], [5, 6.0, 7.0]], 16.0, 0.0, 6, 12, 130.0, 0.046153846153846156]}

In [39]:
sorted_data = sorted(dict_partition.items(), key=lambda x: x[-1], reverse=True)
choose_node = {}
for key, value in sorted_data:
    if value[-1] > 0:
        choose_node[key] = value
choose_node

{9: [[[4], [5, 6.0, 7.0]], 16.0, 0.0, 6, 12, 130.0, 0.046153846153846156],
 8: [[[3], [5, 6.0, 7.0]], 18.0, 3.0, 6, 12, 130.0, 0.04189349112426036],
 6: [[[2], [5, 6.0, 7.0]], 22.0, 7.0, 6, 12, 130.0, 0.03621301775147928],
 3: [[[1], [5, 6.0, 7.0]], 22.0, 13.0, 6, 12, 130.0, 0.02769230769230771]}

In [40]:
the_number_of_node_in_steps(df, partition, choose_node)
partition

[[[4], [5, 6.0, 7.0]]]


{0: [[1], [2], [3], [4], [5], [6], [7]],
 1: [[1], [2], [3], [4], [5], [6.0, 7.0]],
 2: [[1], [2], [3], [4], [5, 6.0, 7.0]],
 3: [[4, 5, 6.0, 7.0], [1], [2], [3]]}

# Lần 4

In [41]:
update_source_target = update_weight_source_target(partition, source_target)
update_source_target

{0: [[1], [2], 6.0],
 1: [[1], [3], 4.0],
 2: [[1], [4, 5, 6.0, 7.0], 16.0],
 3: [[2], [3], 4.0],
 4: [[2], [4, 5, 6.0, 7.0], 16.0],
 5: [[3], [4, 5, 6.0, 7.0], 12.0]}

In [42]:
#########Dang chay cai nay
dict_partition = {}
for i in range(len(update_source_target)):
    # user_group, sum_in
    # print(f"user1: {update_source_target[i][0]} ------ user2: {update_source_target[i][1]}")
    # print(f"weight: {update_source_target[i][2]}")
    user_group = [update_source_target[i][0], update_source_target[i][1]]
    sum_in = update_source_target[i][2] + sum_in_value(source_target, update_source_target[i][1])
    # print(f"user group: {user_group} --------- weight: {sum_in}")
    sum_tot = compute_sum_tot(source_target, update_source_target[i][0], update_source_target[i][1], update_source_target)
    k_i_in = compute_k_i_in(source_target, update_source_target[i][0], update_source_target[i][1])
    k_i = compute_k_i(source_target, update_source_target[i][0], update_source_target[i][1], update_source_target)
    _2m = compute_2_m(source_target, weight_matrix, update_source_target)
    Q = ((sum_in + k_i_in)/(_2m) - ((sum_tot + k_i)/_2m)**2 - (sum_in/_2m - ((sum_tot/_2m)**2) - ((k_i)/_2m)**2))
    # print(f"sum_in: {sum_in}-------sum_tot: {sum_tot}---------k_i_in: {k_i_in}-----k_i:{k_i}-----2m:{_2m}---Q: {Q}")
    # #group - sum_in - sum_tot - k_i_in - k_i - 2m - Q
    dict_partition[i] = [user_group, sum_in, sum_tot, k_i_in, k_i, _2m, Q]
    

user1: [1] --------- user2: [2]
user1: [1] --------- user2: [3]
user1: [1] --------- user2: [4, 5, 6.0, 7.0]
user1: [2] --------- user2: [3]
user1: [2] --------- user2: [4, 5, 6.0, 7.0]
user1: [3] --------- user2: [4, 5, 6.0, 7.0]
user1: [1] --------- user2: [2]
user1: [1] --------- user2: [3]
user1: [1] --------- user2: [4, 5, 6.0, 7.0]
user1: [2] --------- user2: [3]
user1: [2] --------- user2: [4, 5, 6.0, 7.0]
user1: [3] --------- user2: [4, 5, 6.0, 7.0]
4.0-------5.0
4.0-------6.0
4.0-------7.0
5.0-------6.0
5.0-------7.0
6.0-------7.0
user1: [1] --------- user2: [2]
user1: [1] --------- user2: [3]
user1: [1] --------- user2: [4, 5, 6.0, 7.0]
user1: [2] --------- user2: [3]
user1: [2] --------- user2: [4, 5, 6.0, 7.0]
user1: [3] --------- user2: [4, 5, 6.0, 7.0]
user1: [1] --------- user2: [2]
user1: [1] --------- user2: [3]
user1: [1] --------- user2: [4, 5, 6.0, 7.0]
user1: [2] --------- user2: [3]
user1: [2] --------- user2: [4, 5, 6.0, 7.0]
user1: [3] --------- user2: [4, 5, 6.

In [43]:
dict_partition

{0: [[[1], [2]], 6.0, 40.0, 1, 10, 116.0, -0.05083234244946486],
 1: [[[1], [3]], 4.0, 34.0, 1, 10, 116.0, -0.04191438763376931],
 2: [[[1], [4, 5, 6.0, 7.0]], 32.0, 10.0, 10, 10, 116.0, 0.07134363852556475],
 3: [[[2], [3]], 4.0, 28.0, 1, 10, 116.0, -0.0329964328180737],
 4: [[[2], [4, 5, 6.0, 7.0]], 32.0, 4.0, 10, 10, 116.0, 0.08026159334126037],
 5: [[[3], [4, 5, 6.0, 7.0]], 28.0, 0.0, 10, 10, 116.0, 0.08620689655172414]}

In [44]:
sorted_data = sorted(dict_partition.items(), key=lambda x: x[-1], reverse=True)
choose_node = {}
for key, value in sorted_data:
    if value[-1] > 0:
        choose_node[key] = value
choose_node

{5: [[[3], [4, 5, 6.0, 7.0]], 28.0, 0.0, 10, 10, 116.0, 0.08620689655172414],
 4: [[[2], [4, 5, 6.0, 7.0]], 32.0, 4.0, 10, 10, 116.0, 0.08026159334126037],
 2: [[[1], [4, 5, 6.0, 7.0]], 32.0, 10.0, 10, 10, 116.0, 0.07134363852556475]}

In [45]:
the_number_of_node_in_steps(df, partition, choose_node)
partition

[[[3], [4, 5, 6.0, 7.0]]]


{0: [[1], [2], [3], [4], [5], [6], [7]],
 1: [[1], [2], [3], [4], [5], [6.0, 7.0]],
 2: [[1], [2], [3], [4], [5, 6.0, 7.0]],
 3: [[1], [2], [3], [4, 5, 6.0, 7.0]],
 4: [[3, 4, 5, 6.0, 7.0], [1], [2]]}

# Lần 5

In [46]:
update_source_target = update_weight_source_target(partition, source_target)
update_source_target

{0: [[1], [2], 6.0],
 1: [[1], [3, 4, 5, 6.0, 7.0], 20.0],
 2: [[2], [3, 4, 5, 6.0, 7.0], 20.0]}

In [47]:
#########Dang chay cai nay
dict_partition = {}
for i in range(len(update_source_target)):
    # user_group, sum_in
    # print(f"user1: {update_source_target[i][0]} ------ user2: {update_source_target[i][1]}")
    # print(f"weight: {update_source_target[i][2]}")
    user_group = [update_source_target[i][0], update_source_target[i][1]]
    sum_in = update_source_target[i][2] + sum_in_value(source_target, update_source_target[i][1])
    # print(f"user group: {user_group} --------- weight: {sum_in}")
    sum_tot = compute_sum_tot(source_target, update_source_target[i][0], update_source_target[i][1], update_source_target)
    k_i_in = compute_k_i_in(source_target, update_source_target[i][0], update_source_target[i][1])
    k_i = compute_k_i(source_target, update_source_target[i][0], update_source_target[i][1], update_source_target)
    _2m = compute_2_m(source_target, weight_matrix, update_source_target)
    Q = ((sum_in + k_i_in)/(_2m) - ((sum_tot + k_i)/_2m)**2 - (sum_in/_2m - ((sum_tot/_2m)**2) - ((k_i)/_2m)**2))
    # print(f"sum_in: {sum_in}-------sum_tot: {sum_tot}---------k_i_in: {k_i_in}-----k_i:{k_i}-----2m:{_2m}---Q: {Q}")
    # #group - sum_in - sum_tot - k_i_in - k_i - 2m - Q
    dict_partition[i] = [user_group, sum_in, sum_tot, k_i_in, k_i, _2m, Q]
    

user1: [1] --------- user2: [2]
user1: [1] --------- user2: [3, 4, 5, 6.0, 7.0]
user1: [2] --------- user2: [3, 4, 5, 6.0, 7.0]
3.0-------4.0
3.0-------5.0
3.0-------6.0
3.0-------7.0
4.0-------5.0
4.0-------6.0
4.0-------7.0
5.0-------6.0
5.0-------7.0
6.0-------7.0
user1: [1] --------- user2: [2]
user1: [1] --------- user2: [3, 4, 5, 6.0, 7.0]
user1: [2] --------- user2: [3, 4, 5, 6.0, 7.0]
3.0-------4.0
3.0-------5.0
3.0-------6.0
3.0-------7.0
4.0-------5.0
4.0-------6.0
4.0-------7.0
5.0-------6.0
5.0-------7.0
6.0-------7.0
user1: [1] --------- user2: [2]
user1: [1] --------- user2: [3, 4, 5, 6.0, 7.0]
user1: [2] --------- user2: [3, 4, 5, 6.0, 7.0]


In [48]:
dict_partition

{0: [[[1], [2]], 6.0, 40.0, 1, 10, 92.0, -0.08364839319470699],
 1: [[[1], [3, 4, 5, 6.0, 7.0]], 48.0, 6.0, 15, 6, 92.0, 0.1545368620037807],
 2: [[[2], [3, 4, 5, 6.0, 7.0]], 48.0, 0.0, 15, 6, 92.0, 0.16304347826086962]}

In [49]:
sorted_data = sorted(dict_partition.items(), key=lambda x: x[-1], reverse=True)
choose_node = {}
for key, value in sorted_data:
    if value[-1] > 0:
        choose_node[key] = value
choose_node

{2: [[[2], [3, 4, 5, 6.0, 7.0]], 48.0, 0.0, 15, 6, 92.0, 0.16304347826086962],
 1: [[[1], [3, 4, 5, 6.0, 7.0]], 48.0, 6.0, 15, 6, 92.0, 0.1545368620037807]}

In [50]:
the_number_of_node_in_steps(df, partition, choose_node)
partition

[[[2], [3, 4, 5, 6.0, 7.0]]]


{0: [[1], [2], [3], [4], [5], [6], [7]],
 1: [[1], [2], [3], [4], [5], [6.0, 7.0]],
 2: [[1], [2], [3], [4], [5, 6.0, 7.0]],
 3: [[1], [2], [3], [4, 5, 6.0, 7.0]],
 4: [[1], [2], [3, 4, 5, 6.0, 7.0]],
 5: [[2, 3, 4, 5, 6.0, 7.0], [1]]}

# Lần 6

In [51]:
update_source_target = update_weight_source_target(partition, source_target)
update_source_target

{0: [[1], [2, 3, 4, 5, 6.0, 7.0], 26.0]}

In [52]:
#########Dang chay cai nay
dict_partition = {}
for i in range(len(update_source_target)):
    # user_group, sum_in
    # print(f"user1: {update_source_target[i][0]} ------ user2: {update_source_target[i][1]}")
    # print(f"weight: {update_source_target[i][2]}")
    user_group = [update_source_target[i][0], update_source_target[i][1]]
    sum_in = update_source_target[i][2] + sum_in_value(source_target, update_source_target[i][1])
    # print(f"user group: {user_group} --------- weight: {sum_in}")
    sum_tot = compute_sum_tot(source_target, update_source_target[i][0], update_source_target[i][1], update_source_target)
    k_i_in = compute_k_i_in(source_target, update_source_target[i][0], update_source_target[i][1])
    k_i = compute_k_i(source_target, update_source_target[i][0], update_source_target[i][1], update_source_target)
    _2m = compute_2_m(source_target, weight_matrix, update_source_target)
    Q = ((sum_in + k_i_in)/(_2m) - ((sum_tot + k_i)/_2m)**2 - (sum_in/_2m - ((sum_tot/_2m)**2) - ((k_i)/_2m)**2))
    # print(f"sum_in: {sum_in}-------sum_tot: {sum_tot}---------k_i_in: {k_i_in}-----k_i:{k_i}-----2m:{_2m}---Q: {Q}")
    # #group - sum_in - sum_tot - k_i_in - k_i - 2m - Q
    dict_partition[i] = [user_group, sum_in, sum_tot, k_i_in, k_i, _2m, Q]
    

2.0-------3.0
2.0-------4.0
2.0-------5.0
2.0-------6.0
2.0-------7.0
3.0-------4.0
3.0-------5.0
3.0-------6.0
3.0-------7.0
4.0-------5.0
4.0-------6.0
4.0-------7.0
5.0-------6.0
5.0-------7.0
6.0-------7.0
user1: [1] --------- user2: [2, 3, 4, 5, 6.0, 7.0]


In [53]:
dict_partition

{0: [[[1], [2, 3, 4, 5, 6.0, 7.0]],
  74.0,
  0.0,
  21,
  0,
  52.0,
  0.40384615384615374]}

In [54]:
sorted_data = sorted(dict_partition.items(), key=lambda x: x[-1], reverse=True)
choose_node = {}
for key, value in sorted_data:
    if value[-1] > 0:
        choose_node[key] = value
choose_node

{0: [[[1], [2, 3, 4, 5, 6.0, 7.0]],
  74.0,
  0.0,
  21,
  0,
  52.0,
  0.40384615384615374]}

In [55]:
the_number_of_node_in_steps(df, partition, choose_node)
partition

[[[1], [2, 3, 4, 5, 6.0, 7.0]]]


{0: [[1], [2], [3], [4], [5], [6], [7]],
 1: [[1], [2], [3], [4], [5], [6.0, 7.0]],
 2: [[1], [2], [3], [4], [5, 6.0, 7.0]],
 3: [[1], [2], [3], [4, 5, 6.0, 7.0]],
 4: [[1], [2], [3, 4, 5, 6.0, 7.0]],
 5: [[1], [2, 3, 4, 5, 6.0, 7.0]],
 6: [[1, 2, 3, 4, 5, 6.0, 7.0]]}

# Lần 7

In [56]:
update_source_target = update_weight_source_target(partition, source_target)
update_source_target

{}

In [57]:
if len(update_source_target) == 0:
    print(0)
else:
    print(1)

0
