In [122]:
from queue import *
import queue
import networkx as nx
import re

In [123]:
def create_reverse_graph(graph):
    reverse_graph = {}
    for node in graph.keys():
        vertices = graph[node]
        for vertex in vertices:
            if vertex in reverse_graph:
                reverse_graph[vertex].append(node)
            else:
                reverse_graph[vertex] = [node]
    return reverse_graph

In [147]:
def retrive_fist_node(edge):
    for key in graph.keys():
        if edge.startswith(key):
            return key

In [124]:
def bfs(G, source):
    level_nodes = []
    prev_level_nodes = [source]
    level_nodes.append(prev_level_nodes)
    next_level_nodes = []
    bfs = nx.bfs_edges(G, source)
    for edge in bfs:
        if edge[0] in prev_level_nodes:
            next_level_nodes.append(edge[1])
        else:
            level_nodes.append(next_level_nodes)
            prev_level_nodes = next_level_nodes
            next_level_nodes = [edge[1]]
    level_nodes.append(next_level_nodes)
    return level_nodes

In [125]:
def get_no_of_shortest_paths(G, graph, reverse_graph, source):
    level_nodes = bfs(G, source)
    no_of_shortest_paths = {}
    for node in graph.keys():
        no_of_shortest_paths[node] = 0
    no_of_shortest_paths[source] = 1
    for i in range(1, len(level_nodes)):
        for vertex in level_nodes[i]:
            paths_from_prev_level = 0
            for node in level_nodes[i-1]:
                if node in reverse_graph[vertex]:
                    paths_from_prev_level += no_of_shortest_paths[node]
            no_of_shortest_paths[vertex] = paths_from_prev_level 
    return no_of_shortest_paths, level_nodes

In [126]:
def get_flow(flow_on_edges, flow_to_nodes, level_nodes, no_of_shortest_paths, reverse_graph):
    for node in graph.keys():
        flow_to_nodes[node] = 1
    for i in range(len(level_nodes)-1, 0, -1):
        nodes = level_nodes[i]
        for j in range(len(nodes)):
            prev_nodes = level_nodes[i-1]
            for k in range(len(prev_nodes)):
                if prev_nodes[k] in reverse_graph[nodes[j]]:
                    edge = nodes[j] + prev_nodes[k]
                    flow_on_edges[edge] = (no_of_shortest_paths[prev_nodes[k]]/no_of_shortest_paths[nodes[j]]) * flow_to_nodes[nodes[j]]
                    flow_to_nodes[prev_nodes[k]] = flow_to_nodes[prev_nodes[k]] + flow_on_edges[edge]
    return flow_on_edges, flow_to_nodes

In [179]:
def get_betweeness(graph, G, reverse_graph):
    edges = nx.edges(G)
    edge_betweeness = {}
    for element in edges:
        edge_betweeness[element[0] + element[1]] = 0
    for target_node in graph.keys():
        no_of_shortest_paths, level_nodes = get_no_of_shortest_paths(G, graph, reverse_graph, target_node)
        flow_on_edges, flow_to_nodes = get_flow({}, {}, level_nodes, no_of_shortest_paths, reverse_graph)
        for edge in flow_on_edges:
            if edge in edge_betweeness:
                edge_betweeness[edge] += flow_on_edges[edge]
            else:
                first_node = retrive_fist_node(edge)
                second_node = edge[len(first_node):]
                key = second_node + first_node
                if key in edge_betweeness:
                    edge_betweeness[key] += flow_on_edges[edge]
    for edge in edge_betweeness:
        edge_betweeness[edge] = round(edge_betweeness[edge], 4)
    return edge_betweeness

In [72]:
edge_betweeness = get_betweeness(graph)

In [73]:
edge_betweeness

{'AB': 8.0,
 'AE': 5.3333,
 'BF': 7.3333,
 'BG': 8.0,
 'DF': 8.0,
 'DG': 5.3333,
 'EF': 8.0}

In [74]:
graph

{'A': ['B', 'E'],
 'B': ['A', 'F', 'G'],
 'D': ['F', 'G'],
 'E': ['A', 'F'],
 'F': ['B', 'D', 'E'],
 'G': ['B', 'D']}

In [207]:
def main(graph, reverse_graph, no_of_required_components):
    G = nx.Graph(graph)
    while(nx.number_connected_components(G) < no_of_required_components):
        edge_betweeness = get_betweeness(graph, G, reverse_graph)
        max_betweness = max(edge_betweeness.values())
        for edge in edge_betweeness:
            if edge_betweeness[edge] == max_betweness:
                first_node = retrive_fist_node(edge)
                second_node = edge[len(first_node):]
                if second_node not in graph.keys():
                    first_node = edge[:2]
                    second_node = edge[len(first_node):]
                    if second_node not in graph.keys():
                        first_node = edge[:3]
                        second_node = edge[len(first_node):]
                if second_node in graph[first_node]:
                    graph[first_node].remove(second_node)
                if first_node in graph[second_node]:
                    graph[second_node].remove(first_node)
        G = nx.Graph(graph)
    for component in nx.connected_components(G):
        print(component, "\n\n")
    return nx.connected_components(G)

In [205]:
graph = {}
graph['A'] = ['B', 'E']
graph['B'] = ['A', 'F', 'G']
graph['D'] = ['F', 'G']
graph['E'] = ['A', 'F']
graph['F'] = ['B', 'D', 'E']
graph['G'] = ['B', 'D']
reverse_graph = create_reverse_graph(graph)
main(graph, reverse_graph, 2)

{'E', 'A'} 


{'F', 'B'} 


{'D', 'G'} 




In [104]:
graph = {}
graph['A'] = ['B', 'C']
graph['B'] = ['A', 'C', 'D']
graph['D'] = ['B', 'C']
graph['C'] = ['B', 'A', 'D', 'E']
graph['E'] = ['C', 'F', 'G', 'H']
graph['F'] = ['H', 'E']
graph['G'] = ['E', 'H']
graph['H'] = ['E', 'G', 'F']
reverse_graph = create_reverse_graph(graph)
main(graph, reverse_graph, 2)

{'D', 'A', 'B', 'C'}
{'E', 'F', 'G', 'H'}


In [120]:
file = open("/Users/droidx/Desktop/Data mining tech/Social mining/edges_sampled_1.csv", 'r')
graph = {}
for line in file:
    vertices = line.split(",")
    vertices[1] = re.sub('\n', '', vertices[1])
    if vertices[0] in graph:
        graph[vertices[0]].append(vertices[1])
    else:
        graph[vertices[0]] = [vertices[1]]
    if vertices[1] in graph:
        graph[vertices[1]].append(vertices[0])
    else:
        graph[vertices[1]] = [vertices[0]]
reverse_graph = create_reverse_graph(graph)
main(graph, 5)

{'40389', '33346', '44629', '37616', '9227', '53682', '33130', '33321', '27334', '50305', '53249', '29661', '19257', '18192', '70877', '76018', '32939', '26511', '60478', '26602', '73366', '30659', '41869', '77612', '27129', '36278', '64955', '45128', '35083', '13341', '30009', '18839', '42056', '45113', '58508', '50312', '50301', '35126', '6062', '45133', '58522', '71218', '76672', '33543', '26880', '11297', '78698', '25444', '27175', '49722', '73105', '40419', '26581', '33563', '26579', '77306', '19289', '12645', '38633', '33400', '38615', '41866', '37025', '64638', '25934', '27122', '34345', '26982', '29697', '79499', '26736', '32958', '7400', '26678', '2790', '5868', '51672', '60874', '4251', '73371', '68856', '54116', '70413', '26714', '67387', '26536', '45280', '35117', '73103', '29706', '45117', '27219', '42373', '54828', '79444', '62413', '29999', '79879', '69260', '29151', '50194', '32950', '34426', '34319', '39347', '76814', '10396', '56169', '23221', '35238', '36309', '27344

In [197]:
file = open("/Users/droidx/Desktop/Data mining tech/Social mining/edges_sampled_map_1.csv", 'r')
graph = {}
for line in file:
    vertices = line.split(",")
    vertices[1] = re.sub('\n', '', vertices[1])
    if vertices[0] in graph:
        graph[vertices[0]].append(vertices[1])
    else:
        graph[vertices[0]] = [vertices[1]]
    if vertices[1] in graph:
        graph[vertices[1]].append(vertices[0])
    else:
        graph[vertices[1]] = [vertices[0]]

In [198]:
graph.keys()

dict_keys(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '150', '151', '152', '153', '154', '155', '156', 

In [217]:
file = open("/Users/droidx/Desktop/Data mining tech/Social mining/edges_sampled_map_1.csv", 'r')
graph = {}
for line in file:
    vertices = line.split(",")
    vertices[1] = re.sub('\n', '', vertices[1])
    if vertices[0] in graph:
        graph[vertices[0]].append(vertices[1])
    else:
        graph[vertices[0]] = [vertices[1]]
    if vertices[1] in graph:
        graph[vertices[1]].append(vertices[0])
    else:
        graph[vertices[1]] = [vertices[0]]
reverse_graph = create_reverse_graph(graph)
connected_components = main(graph, reverse_graph, 6)

{'56', '7', '3', '36', '29', '40', '299', '112', '109', '47', '76', '124', '103', '99', '114', '60', '49', '274', '296', '275', '98', '31', '12', '57', '196', '152', '52', '209', '64', '158', '153', '21', '66', '191', '30', '151', '232', '302', '110', '0', '309', '129', '208', '102', '204', '198', '197', '271', '6', '39', '222', '45', '113', '157', '32', '127', '33', '41', '62', '133', '77', '46', '55', '245', '122', '83', '51', '132', '123', '50', '59', '17', '149', '4', '156', '194', '273', '215', '128', '87', '199', '240', '117', '16', '281', '48', '131', '71', '190', '28', '202', '224', '53', '34', '13', '290', '5', '300', '150', '120', '72', '106', '121', '298', '15', '37', '154', '203', '155', '43', '58', '54', '81', '63', '85', '221', '14', '108', '35', '61', '297', '22', '8', '75', '19', '292', '78', '272', '289', '280', '84', '44', '288', '20', '79', '18', '67', '223', '107', '104', '82', '111', '118', '105', '193', '1', '9', '115', '80', '287', '218', '195', '159', '74', '101

In [185]:
file = open("/Users/droidx/Desktop/Data mining tech/Social mining/edges_sampled_1.csv", 'r')
graph = {}
for line in file:
    vertices = line.split(",")
    vertices[1] = re.sub('\n', '', vertices[1])
    if vertices[0] in graph:
        graph[vertices[0]].append(vertices[1])
    else:
        graph[vertices[0]] = [vertices[1]]
    if vertices[1] in graph:
        graph[vertices[1]].append(vertices[0])
    else:
        graph[vertices[1]] = [vertices[0]]
reverse_graph = create_reverse_graph(graph)
main(graph, reverse_graph, 6)

{'33346', '44629', '37616', '9227', '33321', '27334', '29661', '19257', '32939', '26511', '26602', '41869', '27129', '36278', '13341', '30009', '18839', '42056', '6062', '33543', '26880', '11297', '25444', '27175', '26581', '33563', '26579', '19289', '12645', '38633', '33400', '38615', '41866', '25934', '27122', '34345', '26982', '29697', '26736', '32958', '7400', '26678', '2790', '5868', '4251', '26714', '67387', '26536', '45280', '29706', '27219', '62413', '29999', '29151', '50194', '32950', '34426', '34319', '39347', '10396', '56169', '23221', '36309', '27344', '26790', '21538', '30121', '4746', '32921', '24296', '27138', '58966', '26512', '29971', '29182', '23612', '32909', '2924', '34313', '26670', '281', '37614', '29988', '26472', '39063', '22388', '24796', '3403', '19338', '32954', '33754', '43871', '26871', '26435', '9783', '36990', '26621', '8904', '20795', '36314', '10121', '22222', '37230', '46716', '26583', '38632', '46449', '26937', '52367', '26480', '37005', '30840', '266

In [121]:
file = open("/Users/droidx/Desktop/Data mining tech/Social mining/edges_sampled_2.csv", 'r')
graph = {}
for line in file:
    vertices = line.split(",")
    vertices[1] = re.sub('\n', '', vertices[1])
    if vertices[0] in graph:
        graph[vertices[0]].append(vertices[1])
    else:
        graph[vertices[0]] = [vertices[1]]
    if vertices[1] in graph:
        graph[vertices[1]].append(vertices[0])
    else:
        graph[vertices[1]] = [vertices[0]]
reverse_graph = create_reverse_graph(graph)
main(graph, reverse_graph, 2)

{'1602', '30087', '57392', '20249', '64260', '43736', '70356', '30169', '40510', '1857', '59618', '67114', '77993', '73767', '80178', '11601', '67064', '35041', '24231', '9296', '43782', '2990', '70957', '67270', '17565', '77051', '23006', '20916', '52699', '45138', '22714', '8065', '42452', '4571', '33161', '35092', '19867', '37432', '30639', '58240', '73100', '35723', '41059', '46476', '34375', '79696', '69861', '65122', '23284', '26208', '10297', '73781', '23242', '66922', '8914', '8966', '36793', '79242', '57144', '14059', '63928', '44287', '13410', '7018', '61934', '31639', '22223', '17796', '67258', '41074', '69883', '65041', '19797', '13890', '73800', '49595', '73791', '1826', '5727', '76727', '73793', '32518', '62475', '61984', '67084', '48397', '17569', '60474', '46512', '46479', '20202', '28729', '80023', '9497', '23922', '41054', '63937', '7502', '31164', '9492', '57391', '25666', '925', '76968', '17146', '20492', '78615', '38481', '61930', '49918', '10665', '38925', '60487'

In [173]:
graph = {}
graph['A'] = ['B', 'E']
graph['B'] = ['A', 'F', 'G']
graph['D'] = ['F', 'G']
graph['E'] = ['A', 'F']
graph['F'] = ['B', 'D', 'E']
graph['G'] = ['B', 'D']
reverse_graph = create_reverse_graph(graph)
main(graph, reverse_graph, 2)

{'E', 'A'} 


{'F', 'B'} 


{'D', 'G'} 




In [187]:
file = open("/Users/droidx/Desktop/Data mining tech/Social mining/edges_sampled_2.csv", 'r')
graph = {}
for line in file:
    vertices = line.split(",")
    vertices[1] = re.sub('\n', '', vertices[1])
    if vertices[0] in graph:
        graph[vertices[0]].append(vertices[1])
    else:
        graph[vertices[0]] = [vertices[1]]
    if vertices[1] in graph:
        graph[vertices[1]].append(vertices[0])
    else:
        graph[vertices[1]] = [vertices[0]]
reverse_graph = create_reverse_graph(graph)
main(graph, reverse_graph, 6)

{'1602', '30087', '57392', '20249', '64260', '43736', '70356', '30169', '40510', '1857', '59618', '67114', '77993', '73767', '80178', '11601', '67064', '35041', '24231', '9296', '43782', '2990', '70957', '67270', '17565', '77051', '23006', '20916', '52699', '45138', '22714', '8065', '42452', '4571', '33161', '35092', '19867', '37432', '30639', '58240', '73100', '35723', '41059', '46476', '34375', '79696', '69861', '65122', '23284', '26208', '10297', '73781', '23242', '66922', '8914', '8966', '36793', '79242', '57144', '14059', '63928', '44287', '13410', '7018', '61934', '31639', '22223', '17796', '67258', '41074', '69883', '65041', '19797', '13890', '73800', '49595', '73791', '1826', '5727', '76727', '73793', '32518', '62475', '61984', '67084', '48397', '17569', '60474', '46512', '46479', '20202', '28729', '80023', '9497', '23922', '41054', '63937', '7502', '31164', '9492', '57391', '25666', '925', '76968', '17146', '20492', '78615', '38481', '61930', '49918', '10665', '38925', '60487'

In [241]:
file = open("/Users/droidx/Desktop/Data mining tech/Social mining/edges_sampled_3.csv", 'r')
graph = {}
for line in file:
    vertices = line.split(",")
    vertices[1] = re.sub('\n', '', vertices[1])
    if vertices[0] in graph:
        graph[vertices[0]].append(vertices[1])
    else:
        graph[vertices[0]] = [vertices[1]]
    if vertices[1] in graph:
        graph[vertices[1]].append(vertices[0])
    else:
        graph[vertices[1]] = [vertices[0]]
reverse_graph = create_reverse_graph(graph)
main(graph, reverse_graph, 5)

KeyboardInterrupt: 

In [190]:
file = open("/Users/droidx/Desktop/Data mining tech/Social mining/edges_sampled_3.csv", 'r')
graph = {}
for line in file:
    vertices = line.split(",")
    vertices[1] = re.sub('\n', '', vertices[1])
    if vertices[0] in graph:
        graph[vertices[0]].append(vertices[1])
    else:
        graph[vertices[0]] = [vertices[1]]
    if vertices[1] in graph:
        graph[vertices[1]].append(vertices[0])
    else:
        graph[vertices[1]] = [vertices[0]]
G = nx.Graph(graph)
print(nx.number_connected_components(G))

4


In [218]:
communities = {}
c_number = 0
for component in connected_components:
    for node in component:
        communities[node] = c_number
    c_number += 1

In [219]:
communities

{'0': 0,
 '1': 0,
 '10': 0,
 '100': 0,
 '101': 0,
 '102': 0,
 '103': 0,
 '104': 0,
 '105': 0,
 '106': 0,
 '107': 0,
 '108': 0,
 '109': 0,
 '11': 0,
 '110': 0,
 '111': 0,
 '112': 0,
 '113': 0,
 '114': 0,
 '115': 0,
 '116': 0,
 '117': 0,
 '118': 0,
 '119': 0,
 '12': 0,
 '120': 0,
 '121': 0,
 '122': 0,
 '123': 0,
 '124': 0,
 '125': 0,
 '126': 0,
 '127': 0,
 '128': 0,
 '129': 0,
 '13': 0,
 '130': 0,
 '131': 0,
 '132': 0,
 '133': 0,
 '134': 2,
 '135': 2,
 '136': 2,
 '137': 2,
 '138': 2,
 '139': 2,
 '14': 0,
 '140': 2,
 '141': 2,
 '142': 2,
 '143': 2,
 '144': 2,
 '145': 2,
 '146': 2,
 '147': 2,
 '148': 2,
 '149': 0,
 '15': 0,
 '150': 0,
 '151': 0,
 '152': 0,
 '153': 0,
 '154': 0,
 '155': 0,
 '156': 0,
 '157': 0,
 '158': 0,
 '159': 0,
 '16': 0,
 '160': 2,
 '161': 2,
 '162': 2,
 '163': 2,
 '164': 2,
 '165': 2,
 '166': 2,
 '167': 2,
 '168': 2,
 '169': 2,
 '17': 0,
 '170': 2,
 '171': 2,
 '172': 2,
 '173': 2,
 '174': 2,
 '175': 2,
 '176': 2,
 '177': 2,
 '178': 2,
 '179': 2,
 '18': 0,
 '180': 2,
 

In [240]:
file = open("my_membership.csv", 'w')
for node in communities:
    line = node + "," + str(communities[node]) + "\n"
    file.write(line)
file.close()

In [226]:
prof_community_membership = {}
file = open("/Users/droidx/Desktop/Data mining tech/Social mining/community_membership_1.csv", 'r')
for line in file:
    if line != '\n':
        parts = line.split(",")
        parts[1] = re.sub('\n', '',parts[1])
        prof_community_membership[parts[0]] = parts[1]

In [232]:
matches = 0
for node in communities:
    if str(communities[node]) == prof_community_membership[node]:
        matches += 1
print("Match percent: " + str(matches/len(communities)))

Match percent: 1.0


In [231]:
type(communities['0'])

int

In [230]:
prof_community_membership['0']

'0'

In [216]:
for component in connected_components:
    print(component)