In [45]:
import snap
from os import path
import pickle
import collections
import numpy as np

In [46]:
BASE_PATH = "../data/stats.stackexchange.com/Bigrams"
FOLDED_NGRAM_GRAPH_PATH = path.join(BASE_PATH, "Userid_Ngram_Folded_Graph.graph")
FOLDED_POSTID_GRAPH_PATH = path.join(BASE_PATH, "Postid_Folded_Graph.graph")

NGRAM_DICT_PICKLE = path.join(BASE_PATH, "Bigramid_Dict")
POSTID_PICKLE = path.join(BASE_PATH, "STATS_20k-Posts_11-top_bigrams_nostem.pickle")

In [40]:
def get_modularity(G, community_dict):
    '''
    This function might be useful to compute the modularity of a given cut
    defined by two sets S and neg_S. We would normally require sets S and neg_S
    to be disjoint and to include all nodes in Graph.

    - community_dict: maps node id to community
    '''
    ##########################################################################
    two_M = G.GetEdges() * 2
    mod_sum = 0
    for NI in G.Nodes():
        NI_id = NI.GetId()
        for NJ in G.Nodes():
            NJ_id = NJ.GetId()
            if (community_dict[NI_id] == community_dict[NJ_id]):
                mod_sum += G.IsEdge(NI_id, NJ_id) - ((NI.GetDeg() * NJ.GetDeg()) / two_M)
    modularity = mod_sum / two_M
    return modularity
    ##########################################################################


In [47]:
f_in = snap.TFIn(FOLDED_POSTID_GRAPH_PATH)
post_graph = snap.TUNGraph.Load(f_in)
print "nodes", post_graph.GetNodes()
print "edges", post_graph.GetEdges()

# labels = snap.TIntStrH()
# for NI in post_graph.Nodes():
#     labels[NI.GetId()] = str(NI.GetId())
# snap.DrawGViz(post_graph, snap.gvlDot, "post-output.png", "Folded Post Graph", labels)

nodes 19687
edges 102305


In [17]:
f_in = snap.TFIn(FOLDED_NGRAM_GRAPH_PATH)
ngram_graph = snap.TUNGraph.Load(f_in)
print "nodes", ngram_graph.GetNodes()
print "edges", ngram_graph.GetEdges()

# labels = snap.TIntStrH()
# for NI in user_graph.Nodes():
#     labels[NI.GetId()] = str(NI.GetId())
# snap.DrawGViz(user_graph, snap.gvlDot, "ngram-output.png", "Folded Ngram Graph", labels)

nodes 85216
edges 4424686


In [18]:
COMMUNITIES_PATH = path.join(BASE_PATH, 'postid-communities.txt')
COMMUNITIES_VEC_PATH = path.join(BASE_PATH, 'postid-communities.vector')

# CNM
assert snap.CntSelfEdges(post_graph) == 0

comm_vec = snap.TCnComV()
modularity = snap.CommunityCNM(post_graph, comm_vec)

f_out = snap.TFOut(COMMUNITIES_VEC_PATH)
comm_vec.Save(f_out)
f_out.Flush()

In [19]:
# f_in = snap.TFIn(COMMUNITIES_VEC_PATH)
# comm_vec = snap.TCnComV()
# comm_vec.Load(f_in)

# print "communities", len(comm_vec)

# pickle_file = open(NGRAM_DICT_PICKLE, 'rb')
# ngram_dict = pickle.load(pickle_file)
# inverted_dict = dict([[v,k] for k,v in ngram_dict.items()])

# with open(COMMUNITIES_PATH, 'w') as f:
#     for i, comm in enumerate(comm_vec):
#         f.write("#####Community {}#####\n".format(i))
#         for node in comm:
#             f.write(inverted_dict[node] + '\n')
#     f.write("The modularity of the network is {}".format(modularity))

In [48]:
f_in = snap.TFIn(COMMUNITIES_VEC_PATH)
comm_vec = snap.TCnComV()
comm_vec.Load(f_in)

print "communities", len(comm_vec)

pickle_file = open(POSTID_PICKLE, 'rb')
postid_dict = pickle.load(pickle_file)

community_dict = collections.defaultdict(int)

with open(COMMUNITIES_PATH, 'w') as f:
    for i, comm in enumerate(comm_vec):
        f.write("#####Community {}#####\n".format(i))
        for node in comm:
            f.write(str(postid_dict[node]) + '\n')
            community_dict[node] = i
    f.write("The modularity of the network is {}\n".format(modularity))
    alt_modularity = get_modularity(post_graph, community_dict)
    f.write("Alternate modularity (sanity check) is {}".format(alt_modularity))

communities 9951


In [21]:
# modularity of communities
f_in = snap.TFIn(COMMUNITIES_VEC_PATH)
comm_vec = snap.TCnComV()
comm_vec.Load(f_in)

for i, comm in enumerate(comm_vec):
    community = snap.TIntV()
    for node in comm:
        community.Add(node)
    print 'Community {} modularity: {}'.format(i, snap.GetModularity(post_graph, community, post_graph.GetEdges()))

Community 0 modularity: 0.113044267539
Community 1 modularity: 0.00293120260465
Community 2 modularity: 0.0
Community 3 modularity: 0.0693691710983
Community 4 modularity: 0.0
Community 5 modularity: 0.00277769006913
Community 6 modularity: 0.000420017511372
Community 7 modularity: 0.0277439061211
Community 8 modularity: 0.0718596828657
Community 9 modularity: 0.0
Community 10 modularity: 0.0
Community 11 modularity: 0.0
Community 12 modularity: 0.0902974058288
Community 13 modularity: 9.77459777437e-06
Community 14 modularity: 0.0290542240877
Community 15 modularity: 0.0
Community 16 modularity: 0.0
Community 17 modularity: 0.0157018021157
Community 18 modularity: 4.88705763699e-05
Community 19 modularity: 0.0
Community 20 modularity: 0.0
Community 21 modularity: 0.0
Community 22 modularity: 0.0
Community 23 modularity: 0.0648875217211
Community 24 modularity: 0.0
Community 25 modularity: 0.0
Community 26 modularity: 0.00372251182503
Community 27 modularity: 0.0
Community 28 modularit

Community 389 modularity: 0.0
Community 390 modularity: 0.0
Community 391 modularity: 0.0
Community 392 modularity: 0.0
Community 393 modularity: 0.0
Community 394 modularity: 0.0
Community 395 modularity: 0.0
Community 396 modularity: 0.0
Community 397 modularity: 0.0
Community 398 modularity: 0.0
Community 399 modularity: 0.0
Community 400 modularity: 0.0
Community 401 modularity: 0.0
Community 402 modularity: 0.0
Community 403 modularity: 0.0
Community 404 modularity: 0.0
Community 405 modularity: 0.0
Community 406 modularity: 0.0
Community 407 modularity: 0.0
Community 408 modularity: 0.0
Community 409 modularity: 0.0
Community 410 modularity: 0.0
Community 411 modularity: 0.0
Community 412 modularity: 0.0
Community 413 modularity: 0.0
Community 414 modularity: 0.0
Community 415 modularity: 0.0
Community 416 modularity: 0.0
Community 417 modularity: 0.0
Community 418 modularity: 9.77459777437e-06
Community 419 modularity: 0.0
Community 420 modularity: 0.0
Community 421 modularity: 

Community 1061 modularity: 0.0
Community 1062 modularity: 0.0
Community 1063 modularity: 9.77459777437e-06
Community 1064 modularity: 0.0
Community 1065 modularity: 0.0
Community 1066 modularity: 0.0
Community 1067 modularity: 0.0
Community 1068 modularity: 9.77459777437e-06
Community 1069 modularity: 0.0
Community 1070 modularity: 0.0
Community 1071 modularity: 0.0
Community 1072 modularity: 0.0
Community 1073 modularity: 0.0
Community 1074 modularity: 0.0
Community 1075 modularity: 0.0
Community 1076 modularity: 0.0
Community 1077 modularity: 0.0
Community 1078 modularity: 0.0
Community 1079 modularity: 0.0
Community 1080 modularity: 0.0
Community 1081 modularity: 0.0
Community 1082 modularity: 0.0
Community 1083 modularity: 0.0
Community 1084 modularity: 0.0
Community 1085 modularity: 0.0
Community 1086 modularity: 0.0
Community 1087 modularity: 0.0
Community 1088 modularity: 0.0
Community 1089 modularity: 0.0
Community 1090 modularity: 0.0
Community 1091 modularity: 0.0
Community 1

Community 1409 modularity: 0.0
Community 1410 modularity: 0.0
Community 1411 modularity: 0.0
Community 1412 modularity: 0.0
Community 1413 modularity: 9.77459777437e-06
Community 1414 modularity: 0.0
Community 1415 modularity: 0.0
Community 1416 modularity: 0.0
Community 1417 modularity: 0.0
Community 1418 modularity: 0.0
Community 1419 modularity: 0.0
Community 1420 modularity: 0.0
Community 1421 modularity: 0.0
Community 1422 modularity: 9.77459777437e-06
Community 1423 modularity: 0.0
Community 1424 modularity: 0.0
Community 1425 modularity: 0.0
Community 1426 modularity: 0.0
Community 1427 modularity: 0.0
Community 1428 modularity: 9.77459777437e-06
Community 1429 modularity: 0.0
Community 1430 modularity: 0.0
Community 1431 modularity: 0.0
Community 1432 modularity: 0.0
Community 1433 modularity: 0.0
Community 1434 modularity: 0.0
Community 1435 modularity: 0.0
Community 1436 modularity: 0.0
Community 1437 modularity: 0.0
Community 1438 modularity: 0.0
Community 1439 modularity: 0

Community 2320 modularity: 0.0
Community 2321 modularity: 0.0
Community 2322 modularity: 0.0
Community 2323 modularity: 0.0
Community 2324 modularity: 0.0
Community 2325 modularity: 0.0
Community 2326 modularity: 0.0
Community 2327 modularity: 0.0
Community 2328 modularity: 0.0
Community 2329 modularity: 0.0
Community 2330 modularity: 0.0
Community 2331 modularity: 0.0
Community 2332 modularity: 0.0
Community 2333 modularity: 9.77459777437e-06
Community 2334 modularity: 9.77459777437e-06
Community 2335 modularity: 0.0
Community 2336 modularity: 0.0
Community 2337 modularity: 9.77459777437e-06
Community 2338 modularity: 0.0
Community 2339 modularity: 0.0
Community 2340 modularity: 0.0
Community 2341 modularity: 0.0
Community 2342 modularity: 0.0
Community 2343 modularity: 0.0
Community 2344 modularity: 0.0
Community 2345 modularity: 0.0
Community 2346 modularity: 0.0
Community 2347 modularity: 0.0
Community 2348 modularity: 0.0
Community 2349 modularity: 0.0
Community 2350 modularity: 0

Community 2923 modularity: 0.0
Community 2924 modularity: 0.0
Community 2925 modularity: 0.0
Community 2926 modularity: 0.0
Community 2927 modularity: 0.0
Community 2928 modularity: 0.0
Community 2929 modularity: 0.0
Community 2930 modularity: 0.0
Community 2931 modularity: 9.77459777437e-06
Community 2932 modularity: 0.0
Community 2933 modularity: 9.77459777437e-06
Community 2934 modularity: 9.77459777437e-06
Community 2935 modularity: 0.0
Community 2936 modularity: 0.0
Community 2937 modularity: 0.0
Community 2938 modularity: 0.0
Community 2939 modularity: 0.0
Community 2940 modularity: 0.0
Community 2941 modularity: 0.0
Community 2942 modularity: 9.7737378727e-05
Community 2943 modularity: 0.0
Community 2944 modularity: 0.0
Community 2945 modularity: 0.0
Community 2946 modularity: 0.0
Community 2947 modularity: 0.0
Community 2948 modularity: 0.0
Community 2949 modularity: 0.0
Community 2950 modularity: 0.0
Community 2951 modularity: 0.0
Community 2952 modularity: 0.0
Community 2953 

Community 3653 modularity: 0.0
Community 3654 modularity: 0.0
Community 3655 modularity: 0.0
Community 3656 modularity: 0.0
Community 3657 modularity: 0.0
Community 3658 modularity: 0.0
Community 3659 modularity: 0.0
Community 3660 modularity: 9.77459777437e-06
Community 3661 modularity: 0.0
Community 3662 modularity: 0.0
Community 3663 modularity: 0.0
Community 3664 modularity: 0.0
Community 3665 modularity: 0.0
Community 3666 modularity: 0.0
Community 3667 modularity: 0.0
Community 3668 modularity: 0.0
Community 3669 modularity: 0.0
Community 3670 modularity: 0.0
Community 3671 modularity: 0.0
Community 3672 modularity: 0.0
Community 3673 modularity: 0.0
Community 3674 modularity: 0.0
Community 3675 modularity: 0.0
Community 3676 modularity: 0.0
Community 3677 modularity: 0.0
Community 3678 modularity: 0.000146597445188
Community 3679 modularity: 0.0
Community 3680 modularity: 0.0
Community 3681 modularity: 0.0
Community 3682 modularity: 0.0
Community 3683 modularity: 0.0
Community 3

Community 4171 modularity: 0.0
Community 4172 modularity: 0.0
Community 4173 modularity: 0.0
Community 4174 modularity: 0.0
Community 4175 modularity: 0.0
Community 4176 modularity: 0.0
Community 4177 modularity: 0.0
Community 4178 modularity: 0.0
Community 4179 modularity: 0.0
Community 4180 modularity: 0.0
Community 4181 modularity: 0.0
Community 4182 modularity: 0.0
Community 4183 modularity: 0.0
Community 4184 modularity: 0.0
Community 4185 modularity: 0.0
Community 4186 modularity: 0.0
Community 4187 modularity: 0.0
Community 4188 modularity: 9.77459777437e-06
Community 4189 modularity: 0.0
Community 4190 modularity: 0.0
Community 4191 modularity: 0.0
Community 4192 modularity: 0.0
Community 4193 modularity: 0.0
Community 4194 modularity: 9.77459777437e-06
Community 4195 modularity: 0.0
Community 4196 modularity: 0.0
Community 4197 modularity: 0.0
Community 4198 modularity: 0.0
Community 4199 modularity: 0.0
Community 4200 modularity: 0.0
Community 4201 modularity: 0.0
Community 4

Community 4977 modularity: 0.0
Community 4978 modularity: 0.0
Community 4979 modularity: 0.0
Community 4980 modularity: 0.0
Community 4981 modularity: 0.0
Community 4982 modularity: 0.0
Community 4983 modularity: 0.0
Community 4984 modularity: 0.0
Community 4985 modularity: 0.0
Community 4986 modularity: 0.0
Community 4987 modularity: 0.0
Community 4988 modularity: 0.0
Community 4989 modularity: 0.0
Community 4990 modularity: 0.0
Community 4991 modularity: 0.0
Community 4992 modularity: 0.0
Community 4993 modularity: 0.0
Community 4994 modularity: 0.0
Community 4995 modularity: 0.0
Community 4996 modularity: 0.0
Community 4997 modularity: 0.0
Community 4998 modularity: 0.0
Community 4999 modularity: 0.0
Community 5000 modularity: 9.77459777437e-06
Community 5001 modularity: 0.0
Community 5002 modularity: 0.0
Community 5003 modularity: 0.0
Community 5004 modularity: 0.0
Community 5005 modularity: 0.0
Community 5006 modularity: 0.0
Community 5007 modularity: 0.0
Community 5008 modularity

Community 5540 modularity: 0.0
Community 5541 modularity: 0.0
Community 5542 modularity: 0.0
Community 5543 modularity: 0.0
Community 5544 modularity: 0.0
Community 5545 modularity: 0.0
Community 5546 modularity: 0.0
Community 5547 modularity: 0.0
Community 5548 modularity: 0.0
Community 5549 modularity: 0.0
Community 5550 modularity: 0.0
Community 5551 modularity: 0.0
Community 5552 modularity: 0.0
Community 5553 modularity: 0.0
Community 5554 modularity: 0.0
Community 5555 modularity: 0.0
Community 5556 modularity: 0.0
Community 5557 modularity: 0.0
Community 5558 modularity: 0.0
Community 5559 modularity: 0.0
Community 5560 modularity: 0.0
Community 5561 modularity: 0.0
Community 5562 modularity: 0.0
Community 5563 modularity: 9.77459777437e-06
Community 5564 modularity: 0.0
Community 5565 modularity: 0.0
Community 5566 modularity: 0.0
Community 5567 modularity: 0.0
Community 5568 modularity: 0.0
Community 5569 modularity: 0.0
Community 5570 modularity: 0.0
Community 5571 modularity

Community 6400 modularity: 0.0
Community 6401 modularity: 0.0
Community 6402 modularity: 0.0
Community 6403 modularity: 0.0
Community 6404 modularity: 0.0
Community 6405 modularity: 0.0
Community 6406 modularity: 0.0
Community 6407 modularity: 0.0
Community 6408 modularity: 0.0
Community 6409 modularity: 0.0
Community 6410 modularity: 0.0
Community 6411 modularity: 0.0
Community 6412 modularity: 0.0
Community 6413 modularity: 0.0
Community 6414 modularity: 0.0
Community 6415 modularity: 0.0
Community 6416 modularity: 0.0
Community 6417 modularity: 0.0
Community 6418 modularity: 0.0
Community 6419 modularity: 0.0
Community 6420 modularity: 0.0
Community 6421 modularity: 9.77459777437e-06
Community 6422 modularity: 0.0
Community 6423 modularity: 0.0
Community 6424 modularity: 0.0
Community 6425 modularity: 0.0
Community 6426 modularity: 0.0
Community 6427 modularity: 0.0
Community 6428 modularity: 0.0
Community 6429 modularity: 0.0
Community 6430 modularity: 0.0
Community 6431 modularity

Community 6849 modularity: 0.0
Community 6850 modularity: 0.0
Community 6851 modularity: 0.0
Community 6852 modularity: 0.0
Community 6853 modularity: 0.0
Community 6854 modularity: 0.0
Community 6855 modularity: 0.0
Community 6856 modularity: 0.0
Community 6857 modularity: 0.0
Community 6858 modularity: 0.0
Community 6859 modularity: 0.0
Community 6860 modularity: 0.0
Community 6861 modularity: 0.0
Community 6862 modularity: 9.77459777437e-06
Community 6863 modularity: 0.0
Community 6864 modularity: 0.0
Community 6865 modularity: 0.0
Community 6866 modularity: 0.0
Community 6867 modularity: 0.0
Community 6868 modularity: 0.0
Community 6869 modularity: 0.0
Community 6870 modularity: 0.0
Community 6871 modularity: 0.0
Community 6872 modularity: 0.0
Community 6873 modularity: 0.0
Community 6874 modularity: 0.0
Community 6875 modularity: 0.0
Community 6876 modularity: 0.0
Community 6877 modularity: 0.0
Community 6878 modularity: 0.0
Community 6879 modularity: 0.0
Community 6880 modularity

Community 7601 modularity: 0.0
Community 7602 modularity: 0.0
Community 7603 modularity: 0.0
Community 7604 modularity: 0.0
Community 7605 modularity: 0.0
Community 7606 modularity: 0.0
Community 7607 modularity: 0.0
Community 7608 modularity: 0.0
Community 7609 modularity: 0.0
Community 7610 modularity: 0.0
Community 7611 modularity: 0.0
Community 7612 modularity: 0.0
Community 7613 modularity: 0.0
Community 7614 modularity: 0.0
Community 7615 modularity: 0.0
Community 7616 modularity: 0.0
Community 7617 modularity: 0.0
Community 7618 modularity: 0.0
Community 7619 modularity: 0.0
Community 7620 modularity: 0.0
Community 7621 modularity: 0.0
Community 7622 modularity: 0.0
Community 7623 modularity: 0.0
Community 7624 modularity: 0.0
Community 7625 modularity: 0.0
Community 7626 modularity: 0.0
Community 7627 modularity: 0.0
Community 7628 modularity: 0.0
Community 7629 modularity: 0.0
Community 7630 modularity: 0.0
Community 7631 modularity: 0.0
Community 7632 modularity: 0.0
Communit

Community 8204 modularity: 0.0
Community 8205 modularity: 0.0
Community 8206 modularity: 0.0
Community 8207 modularity: 0.0
Community 8208 modularity: 0.0
Community 8209 modularity: 0.0
Community 8210 modularity: 0.0
Community 8211 modularity: 0.0
Community 8212 modularity: 0.0
Community 8213 modularity: 0.0
Community 8214 modularity: 0.0
Community 8215 modularity: 0.0
Community 8216 modularity: 0.0
Community 8217 modularity: 0.0
Community 8218 modularity: 0.0
Community 8219 modularity: 0.0
Community 8220 modularity: 0.0
Community 8221 modularity: 0.0
Community 8222 modularity: 0.0
Community 8223 modularity: 0.0
Community 8224 modularity: 0.0
Community 8225 modularity: 0.0
Community 8226 modularity: 0.0
Community 8227 modularity: 0.0
Community 8228 modularity: 0.0
Community 8229 modularity: 9.77459777437e-06
Community 8230 modularity: 0.0
Community 8231 modularity: 0.0
Community 8232 modularity: 0.0
Community 8233 modularity: 0.0
Community 8234 modularity: 0.0
Community 8235 modularity

Community 8940 modularity: 0.0
Community 8941 modularity: 0.0
Community 8942 modularity: 0.0
Community 8943 modularity: 0.0
Community 8944 modularity: 0.0
Community 8945 modularity: 0.0
Community 8946 modularity: 0.0
Community 8947 modularity: 0.0
Community 8948 modularity: 0.0
Community 8949 modularity: 0.0
Community 8950 modularity: 0.0
Community 8951 modularity: 0.0
Community 8952 modularity: 0.0
Community 8953 modularity: 0.0
Community 8954 modularity: 0.0
Community 8955 modularity: 0.0
Community 8956 modularity: 0.0
Community 8957 modularity: 0.0
Community 8958 modularity: 0.0
Community 8959 modularity: 0.0
Community 8960 modularity: 0.0
Community 8961 modularity: 0.0
Community 8962 modularity: 0.0
Community 8963 modularity: 0.0
Community 8964 modularity: 0.0
Community 8965 modularity: 0.0
Community 8966 modularity: 0.0
Community 8967 modularity: 0.0
Community 8968 modularity: 0.0
Community 8969 modularity: 0.0
Community 8970 modularity: 0.0
Community 8971 modularity: 0.0
Communit

Community 9305 modularity: 0.0
Community 9306 modularity: 0.0
Community 9307 modularity: 0.0
Community 9308 modularity: 0.0
Community 9309 modularity: 0.0
Community 9310 modularity: 0.0
Community 9311 modularity: 0.0
Community 9312 modularity: 0.0
Community 9313 modularity: 0.0
Community 9314 modularity: 0.0
Community 9315 modularity: 0.0
Community 9316 modularity: 0.0
Community 9317 modularity: 0.0
Community 9318 modularity: 0.0
Community 9319 modularity: 0.0
Community 9320 modularity: 0.0
Community 9321 modularity: 0.0
Community 9322 modularity: 0.0
Community 9323 modularity: 0.0
Community 9324 modularity: 0.0
Community 9325 modularity: 0.0
Community 9326 modularity: 0.0
Community 9327 modularity: 0.0
Community 9328 modularity: 0.0
Community 9329 modularity: 0.0
Community 9330 modularity: 0.0
Community 9331 modularity: 0.0
Community 9332 modularity: 0.0
Community 9333 modularity: 0.0
Community 9334 modularity: 0.0
Community 9335 modularity: 0.0
Community 9336 modularity: 0.0
Communit