In [1]:
import scipy as sc
from scipy import stats
import numpy as np
import csv
import time
import networkx as nx 
import pandas as pd
from collections import defaultdict
import itertools
import json
import random
c2a = np.loadtxt('c2a_small_sub.txt')
a2q = np.loadtxt('a2q_small_sub.txt')
c2q = np.loadtxt('c2q_small_sub.txt')


In [92]:
#----------------------------------------------------------#
#                     Function Part                        #
#----------------------------------------------------------#
def create_graph(data):
    G = nx.Graph()
    edges = data[:,0:2].copy()
    G.add_edges_from(edges)
    return G
def construct_graph(G_c2a,G_a2q,G_c2q):
    node_c2a = G_c2a.nodes
    node_a2q = G_a2q.nodes
    node_c2q = G_c2q.nodes

    node = list(node_c2a&node_a2q&node_c2q)

    g_c2a = G_c2a.subgraph(node)
    g_a2q = G_a2q.subgraph(node)
    g_c2q = G_c2q.subgraph(node)
    return g_c2a,g_a2q,g_c2q

def rec_dd():
    return defaultdict(rec_dd)
def neighbors_in_each_temporal_network(dict_data):
    temp = dict_data.values()
    temp  =  [x for x in temp if x]
    neighbors_set = list(itertools.chain.from_iterable(temp))
    return neighbors_set

In [3]:
#----------------------------------------------------------#
#  Combining 3 Layers to Construct Completed Network       #
#----------------------------------------------------------#
Total_1500_nodes = np.concatenate((c2a, a2q), axis=0)
Total_1500_nodes = np.concatenate((Total_1500_nodes,c2q),axis=0)
print(c2a.shape,c2q.shape, c2q.shape,Total_1500_nodes.shape, )
total_network = create_graph(Total_1500_nodes)
#  examine if the combined network is right
c2a_graph = create_graph(c2a)
a2q_graph = create_graph(a2q)
c2q_graph = create_graph(c2q)
# to get common nodes for each layer
g_c2a, g_a2q, g_c2q = construct_graph(c2a_graph,a2q_graph,c2q_graph)

print('Combined Graph:''number of edges:',total_network.number_of_edges(),'/number of nodes:',total_network.number_of_nodes(),'/is connected or not?:',nx.is_connected(total_network))
print('Graph:c2a','number of edges:',g_c2a.number_of_edges(),'/number of nodes:',g_c2a.number_of_nodes(),'/is connected or not?:',nx.is_connected(g_c2a))
print('Graph:a2q','number of edges:',g_a2q.number_of_edges(),'/number of nodes:',g_a2q.number_of_nodes(),'/is connected or not?:',nx.is_connected(g_a2q))
print('Graph:c2q','number of edges:',g_c2q.number_of_edges(),'/number of nodes:',g_c2q.number_of_nodes(),'/is connected or not?:',nx.is_connected(g_c2q))

connected_graph = max(nx.connected_component_subgraphs(total_network), key=len)
print('Connected Graph:''number of edges:',connected_graph.number_of_edges(),'/number of nodes:',connected_graph.number_of_nodes(),'/is connected or not?:',nx.is_connected(connected_graph))



(18965, 3) (4326, 3) (4326, 3) (27683, 3)
Combined Graph:number of edges: 5208 /number of nodes: 758 /is connected or not?: True
Graph:c2a number of edges: 2557 /number of nodes: 657 /is connected or not?: False
Graph:a2q number of edges: 2716 /number of nodes: 657 /is connected or not?: False
Graph:c2q number of edges: 1355 /number of nodes: 657 /is connected or not?: False
Connected Graph:number of edges: 5208 /number of nodes: 758 /is connected or not?: True


In [6]:
#----------------------------------------------------------#
#                Choose One Centrality                     #
#----------------------------------------------------------#
# finally we chose closeness considering there r lots of 0 in betweenness 
# betweenness = nx.betweenness_centrality(connected_graph)
closeness = nx.closeness_centrality(connected_graph)
#----------------------------------------------------------#
#      Rank Centrality to Seperate 3 Groups                #
#----------------------------------------------------------#
# if we use percentile then there is no need to use 'sorted' part
# sorted_betweeness = sorted(betweenness.items(), key=operator.itemgetter(1))
# sorted_closeness = sorted(closeness.items(), key=operator.itemgetter(1))
# print(sorted_closeness)


In [7]:
#----------------------------------------------------------#
#                   Processing Timestamp                   #
#----------------------------------------------------------#
sorted_timestamp = sorted(Total_1500_nodes, key=lambda entry: entry[2])
timestamp_sorted_data = pd.DataFrame(sorted_timestamp)
# df[list("ABCD")] = df[list("ABCD")].astype(int)
# lower_quantile, lower_medium_quantile,upper_medium_quantile,upper_quantile = timestamp_sorted_data[2].quantile([.25, .50,.75,1])
# print(lower_quantile,lower_medium_quantile,upper_medium_quantile,upper_quantile)

# use index of sorted data to seperate the network into four temporal networks and create corresponding graphs
temp1 = timestamp_sorted_data.iloc[:6920, :]
temp2 = timestamp_sorted_data.iloc[6921:13840, :]
temp3 = timestamp_sorted_data.iloc[13841:20760, :]
temp4 = timestamp_sorted_data.iloc[20761:27683, :]

temp1_graph = create_graph(temp1.values)
temp2_graph = create_graph(temp2.values)
temp3_graph = create_graph(temp3.values)
temp4_graph = create_graph(temp4.values)
graph_list= [temp1_graph,temp2_graph,temp3_graph,temp4_graph]

temp1_nodes = temp1_graph.nodes()
temp2_nodes = temp2_graph.nodes()
temp3_nodes = temp3_graph.nodes()
temp4_nodes = temp4_graph.nodes()
graph_nodes_list = [temp1_nodes, temp2_nodes, temp3_nodes,temp4_nodes]

In [None]:
#----------------------------------------------------------#
#       How to Select Nodes from Different Group           #
#----------------------------------------------------------#
# we try to divide all nodes into three parts: 1.active users 2. normal users 3. inactive users
# nodes with lower closeness should be the inactive users group
# nodes with higher closeness should be the active users group
# clossness of normal users is between it of inactive users and active users

# find the dividing line of groups by the value of clossness
t1 = np.percentile(list(closeness.values()),33)
t2 = np.percentile(list(closeness.values()),67)
print(t1, t2)
#t1, t2: 0.33184435421062075 0.36587723537941036

# one question here: how many nodes should we choose for each group?
 
# following code trys to analyze the distribution of commone users in four temporal graph
# then I find every node is active node if we try to find common users in four temporal graphs
# which means all closeness is bigger than t2
# so I should use other metheds to select nodes

temp_graph_nodes = None
for graph in graph_list:
    if temp_graph is None:
        temp_graph_nodes = set(graph.nodes())
    else:
        temp_graph_nodes = set(graph.nodes()) & temp_graph_nodes
# print(temp_graph)
# set(temp1_graph.nodes()

# candidate_node_dict = defaultdict(list)
# for node in temp_graph:
#     temp_graph_closeness = nx.closeness_centrality(connected_graph, node)
#     if temp_graph_closeness <= t1:
#         label = 0 # is inactive user
#     elif temp_graph_closeness> t2:
#         label = 2 # active user
#     else:
#         label = 1 # normal user
#     candidate_node_dict[label].append(node)
# print(candidate_node_dict)


In [34]:
#finally I decide to create a criterion to find reasonable number of nodes in each group
candidate_node_dict = defaultdict(list)
all_active_users_set = []
all_inactive_users_set = []
all_normal_users_set = []

for node in connected_graph.nodes():
    connected_graph_closeness = nx.closeness_centrality(connected_graph, node)
    if connected_graph_closeness <= t1:
        all_inactive_users_set.append(node) # is inactive user
    elif connected_graph_closeness> t2:
        all_active_users_set.append(node) # active user
    else:
        all_normal_users_set.append(node)# normal user
print(len(all_active_users_set), len(all_inactive_users_set), len(all_normal_users_set))
#250 249 259

group_nodes_list = [all_active_users_set, all_inactive_users_set, all_normal_users_set]
graph_nodes_list = [temp1_nodes, temp2_nodes, temp3_nodes,temp4_nodes]

user_score = {}
for user_set in group_nodes_list:
    for user_index, user in zip(range(1, len(user_set)+1),user_set):
        score = 0
        for temp_node_set in graph_nodes_list:
                if user in temp_node_set:
                    score = score + 1
        user_score[user] = score
# print(user_score)

for index, user_set in zip(range(1,len(group_nodes_list)+1), group_nodes_list):
    score_for_each_group = dict((key, user_score[key]) for key in user_set)
    average_score = sum(score_for_each_group.values())/float(len(score_for_each_group))
    print('index:',index, 'average_score:',average_score)
#     print('sum:',sum(score_for_each_group), 'length:' , len(score_for_each_group))
# all_active_users_set: 1 average_score: 3.3654618473895583
# all_inactive_users_set: 2 average_score: 1.916
# all_normal_users_set: 3 average_score: 2.7606177606177607

# basic math here:
# 1 average_score* 2 average_score* 3 average_score almost equals 18
# so for acitve users I choose 18/3.37 = 5
# as for inactive users 18/1.9 = 10
# as for normal users 18/2.76 = 6
# so I randomly pick 5, 10, 6 users from active_user_set, inacitve_user_set normal_user_set.

249 250 259


In [36]:
#----------------------------------------------------------#
#       Selecting Nodes from Different Group               #
#----------------------------------------------------------#
selected_active_users = random.sample(all_active_users_set, 5)
selected_inactive_users = random.sample(all_inactive_users_set, 10)
selected_normal_users = random.sample(all_normal_users_set, 6)
# print(selected_active_users, selected_inactive_users, selected_normal_users)

[74601.0, 91607.0, 42769.0, 82686.0, 19100.0] [87942.0, 92957.0, 111833.0, 15562.0, 149825.0, 58997.0, 47878.0, 15816.0, 58549.0, 4771.0] [12881.0, 425.0, 79853.0, 94162.0, 894.0, 10693.0]


In [41]:
#----------------------------------------------------------------#
# Constructing Subgraph for Selected nodes and their Neighbors   #
#----------------------------------------------------------------#

#----------------------------------------------------------#
#               For 1 HOP Neighbors                        #
#----------------------------------------------------------#

selected_userset_list = [selected_active_users, selected_inactive_users, selected_normal_users]
neighbor_dict = rec_dd()
for network_index, temporal_network in zip(range(1, len(graph_list)+1), graph_list):
    for user_set_index, user_set in zip(range(1, len(selected_userset_list)+1),selected_userset_list):
        for i, node in zip(range(1, len(user_set)+1), user_set):
            try:
                neighbor_list = list(nx.all_neighbors(temporal_network, node))
            except Exception as e:
#                 print(e)
                neighbor_list = []
            neighbor_dict[network_index][user_set_index][i] = neighbor_list

# print(json.dumps(neighbor_dict, indent = 4))     

# three groups, four temporal networks, so 12 set.

active_users_neighbors_in_temp1 = neighbors_in_each_temporal_network(neighbor_dict[1][1])
inactive_users_neighbors_in_temp1 = neighbors_in_each_temporal_network(neighbor_dict[1][2])
normal_users_neighbors_in_temp1 = neighbors_in_each_temporal_network(neighbor_dict[1][3])

active_users_neighbors_in_temp2 = neighbors_in_each_temporal_network(neighbor_dict[2][1])
inactive_users_neighbors_in_temp2 = neighbors_in_each_temporal_network(neighbor_dict[2][2])
normal_users_neighbors_in_temp2 = neighbors_in_each_temporal_network(neighbor_dict[2][3])

active_users_neighbors_in_temp3 = neighbors_in_each_temporal_network(neighbor_dict[3][1])
inactive_users_neighbors_in_temp3 = neighbors_in_each_temporal_network(neighbor_dict[3][2])
normal_users_neighbors_in_temp3 = neighbors_in_each_temporal_network(neighbor_dict[3][3])

active_users_neighbors_in_temp4 = neighbors_in_each_temporal_network(neighbor_dict[4][1])
inactive_users_neighbors_in_temp4 = neighbors_in_each_temporal_network(neighbor_dict[4][2])
normal_users_neighbors_in_temp4 = neighbors_in_each_temporal_network(neighbor_dict[4][3])
# print(len(active_users_neighbors_in_temp1),len(inactive_users_neighbors_in_temp3),len(normal_users_neighbors_in_temp4))


In [101]:
#----------------------------------------------------------#
#             1 HOP Neighbors SubGraph                     #
#----------------------------------------------------------#
graph_list= [temp1_graph,temp2_graph,temp3_graph,temp4_graph]

active_in_temp1_graph =temp1_graph.subgraph(active_users_neighbors_in_temp1)
inactive_in_temp1_graph =temp1_graph.subgraph(inactive_users_neighbors_in_temp1)
normal_in_temp1_graph =temp1_graph.subgraph(normal_users_neighbors_in_temp1)

active_in_temp2_graph =temp2_graph.subgraph(active_users_neighbors_in_temp2)
inactive_in_temp2_graph =temp2_graph.subgraph(inactive_users_neighbors_in_temp2)
normal_in_temp2_graph =temp2_graph.subgraph(normal_users_neighbors_in_temp2)

active_in_temp3_graph =temp3_graph.subgraph(active_users_neighbors_in_temp3)
inactive_in_temp3_graph =temp3_graph.subgraph(inactive_users_neighbors_in_temp3)
normal_in_temp3_graph =temp3_graph.subgraph(normal_users_neighbors_in_temp3)

active_in_temp4_graph =temp4_graph.subgraph(active_users_neighbors_in_temp4)
inactive_in_temp4_graph =temp4_graph.subgraph(inactive_users_neighbors_in_temp4)
normal_in_temp4_graph =temp4_graph.subgraph(normal_users_neighbors_in_temp4)

# random examination 
print('acitve_users_graph:''number of edges:',active_in_temp1_graph.number_of_edges(),'/number of nodes:',active_in_temp1_graph.number_of_nodes(),'/is connected or not?:',nx.is_connected(active_in_temp1_graph))
print('normal_users_graph:''number of edges:',normal_in_temp3_graph.number_of_edges(),'/number of nodes:',normal_in_temp3_graph.number_of_nodes(),'/is connected or not?:',nx.is_connected(normal_in_temp3_graph))
print('inactive_users_graph:''number of edges:',inactive_in_temp4_graph.number_of_edges(),'/number of nodes:',inactive_in_temp4_graph.number_of_nodes(),'/is connected or not?:',nx.is_connected(inactive_in_temp4_graph))


acitve_users_graph:number of edges: 79 /number of nodes: 27 /is connected or not?: True
normal_users_graph:number of edges: 16 /number of nodes: 10 /is connected or not?: False
inactive_users_graph:number of edges: 12 /number of nodes: 10 /is connected or not?: False


In [124]:
#----------------------------------------------------------#
#               1 HOP SubGraph Centrality                  #
#----------------------------------------------------------#
# 4 centrality for 12 networks: 48

neighbors_subgraph_list = [active_in_temp1_graph,inactive_in_temp1_graph,normal_in_temp1_graph,active_in_temp2_graph, inactive_in_temp2_graph,normal_in_temp2_graph,active_in_temp3_graph, inactive_in_temp3_graph,normal_in_temp3_graph,active_in_temp4_graph, inactive_in_temp4_graph,normal_in_temp4_graph,]

Degree_data = []
Eigenvector_data = []
Closeness_data = []
Betweenness_data = []
for graph_index, different_graph in zip(range(1, len(neighbors_subgraph_list)+1), neighbors_subgraph_list):
    Degree_data.append(nx.degree_centrality(different_graph))
    Eigenvector_data.append(nx.eigenvector_centrality_numpy(different_graph))
    Closeness_data.append( nx.closeness_centrality(different_graph))
    Betweenness_data.append( nx.betweenness_centrality(different_graph))
    
print(len(Degree_data), len(Eigenvector_data), len(Closeness_data), len(Betweenness_data))

centrality_data = pd.DataFrame({'Degree': Degree_data})
centrality_data['Eigenvector'] = pd.Series(Eigenvector_data)
centrality_data['Closeness'] = pd.Series(Closeness_data)
centrality_data['Betweenness'] = pd.Series(Betweenness_data)

# a = centrality_data.index[0]

centrality_data = centrality_data.rename(index={centrality_data.index[0]: 'ActiveInTemp1'})
centrality_data = centrality_data.rename(index={centrality_data.index[1]: 'InActiveInTemp1'})
centrality_data = centrality_data.rename(index={centrality_data.index[2]: 'NormalInTemp1'})  
centrality_data = centrality_data.rename(index={centrality_data.index[3]: 'ActiveInTemp2'})
centrality_data = centrality_data.rename(index={centrality_data.index[4]: 'InActiveInTemp2'})
centrality_data = centrality_data.rename(index={centrality_data.index[5]: 'NormalInTemp2'})
centrality_data = centrality_data.rename(index={centrality_data.index[6]: 'ActiveInTemp3'})
centrality_data = centrality_data.rename(index={centrality_data.index[7]: 'InActiveInTemp3'})
centrality_data = centrality_data.rename(index={centrality_data.index[8]: 'NormalInTemp3'})                         
centrality_data = centrality_data.rename(index={centrality_data.index[9]: 'ActiveInTemp4'})
centrality_data = centrality_data.rename(index={centrality_data.index[10]: 'InActiveInTemp4'})
centrality_data = centrality_data.rename(index={centrality_data.index[11]: 'NormalInTemp4'})  
centrality_data.to_csv('3Group4TempCentralityData.csv', sep='\t', encoding='utf-8')

    

12 12 12 12


In [159]:
#----------------------------------------------------------#
#               For 2 HOP                                  #
#----------------------------------------------------------#