In [4]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p

from sklearn.preprocessing import normalize, scale
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression

In [5]:
def lg(exp_id, graph_name, index=[0], norm=False, split=0.5, max_iter=100, C=1e9, ic=500):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    emb = None
    if index is None:
        emb = w
    else:
        for i in index:
            if emb is None:
                emb = w[i]
            else:
                emb += w[i]
        emb /= len(index)
    if norm:
        emb = normalize(emb)
    xids, y_train = graph.gen_training_community(split)
    X = [emb[i] for i in xids]
    predictor = LogisticRegression(C=C, max_iter=max_iter, 
                                   n_jobs=-1, intercept_scaling=ic).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(emb[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))


In [6]:
# Evaluation excluding training data 
def lg_blind(exp_id, graph_name, index=[0], norm=False, split=0.5, max_iter=100, C=1e9, ic=500):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    emb = None
    if index is None:
        emb = w
    else:
        for i in index:
            if emb is None:
                emb = w[i]
            else:
                emb += w[i]
        emb /= len(index)
    if norm:
        emb = normalize(emb)
    xids, y_train = graph.gen_training_community(split)
    X = [emb[i] for i in xids]
    predictor = LogisticRegression(C=C, max_iter=max_iter, 
                                   n_jobs=-1, intercept_scaling=ic).fit(X, y_train)
    eval_list = [i for i in graph.nodes() if i not in xids]
    y_true = [graph._communities[i] for i in eval_list]
    y_pred = [predictor.predict(emb[i].reshape(1,-1))[0] for i in eval_list]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))


In [7]:
for _ in range(20):
    lg('BC3036', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)

Experiment  BC3036   blogcatalog3
f1_macro (emb):  0.242404980248
f1_micro (emb):  0.276183087665
Experiment  BC3036   blogcatalog3
f1_macro (emb):  0.245604655493
f1_micro (emb):  0.275310318076
Experiment  BC3036   blogcatalog3
f1_macro (emb):  0.238308751981
f1_micro (emb):  0.268328161365
Experiment  BC3036   blogcatalog3
f1_macro (emb):  0.238822041305
f1_micro (emb):  0.268910007758
Experiment  BC3036   blogcatalog3
f1_macro (emb):  0.238173024203
f1_micro (emb):  0.269297905353
Experiment  BC3036   blogcatalog3
f1_macro (emb):  0.236483572211
f1_micro (emb):  0.26794026377
Experiment  BC3036   blogcatalog3
f1_macro (emb):  0.235506481236
f1_micro (emb):  0.267455391777
Experiment  BC3036   blogcatalog3
f1_macro (emb):  0.240548253239
f1_micro (emb):  0.273661753297
Experiment  BC3036   blogcatalog3
f1_macro (emb):  0.235452285923
f1_micro (emb):  0.271722265322
Experiment  BC3036   blogcatalog3
f1_macro (emb):  0.236160528821
f1_micro (emb):  0.270461598138
Experiment  BC3036   

In [9]:
for _ in range(20):
    lg('BC3_node2vec', 'blogcatalog3', index=None, norm=True, max_iter=1000, C=1e5, ic=100)

Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.15103246111
f1_micro (emb):  0.144103956555
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.143679185549
f1_micro (emb):  0.136539953452
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.148317720769
f1_micro (emb):  0.142067494182
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.143344426648
f1_micro (emb):  0.135667183863
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.143482901655
f1_micro (emb):  0.13828549263
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.144041880598
f1_micro (emb):  0.13983708301
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.141190449462
f1_micro (emb):  0.135376260667
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.132102033806
f1_micro (emb):  0.138770364624
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.153330316498
f1_micro (emb):  0.141291698991
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.14744881

In [10]:
for _ in range(20):
    lg_blind('BC3_node2vec', 'blogcatalog3', index=None, norm=True, max_iter=1000, C=1e5, ic=100)

Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0217215666886
f1_micro (emb):  0.0808823529412
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0178215915039
f1_micro (emb):  0.0795278637771
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0177178737444
f1_micro (emb):  0.0739164086687
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0175072573236
f1_micro (emb):  0.0812693498452
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0193490459868
f1_micro (emb):  0.0849458204334
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0213828372942
f1_micro (emb):  0.0791408668731
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0170071113351
f1_micro (emb):  0.0758513931889
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0206169910542
f1_micro (emb):  0.0808823529412
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0185294915201
f1_micro (emb):  0.077786377709
Experiment  BC3_node2vec   blogcatalog3
f1_macr

  'precision', 'predicted', average, warn_for)


Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0207306627107
f1_micro (emb):  0.0789473684211
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0191377344979
f1_micro (emb):  0.0766253869969
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0175173209998
f1_micro (emb):  0.0733359133127
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0174412357105
f1_micro (emb):  0.077399380805
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0155058976021
f1_micro (emb):  0.078366873065
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0219830803936
f1_micro (emb):  0.0795278637771
Experiment  BC3_node2vec   blogcatalog3
f1_macro (emb):  0.0232550569843
f1_micro (emb):  0.0816563467492


In [11]:
for _ in range(20):
    lg('BC3037', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)

Experiment  BC3037   blogcatalog3
f1_macro (emb):  0.227577327289
f1_micro (emb):  0.253297129558
Experiment  BC3037   blogcatalog3
f1_macro (emb):  0.217357363105
f1_micro (emb):  0.250969743988
Experiment  BC3037   blogcatalog3
f1_macro (emb):  0.220909624392
f1_micro (emb):  0.252618308766
Experiment  BC3037   blogcatalog3
f1_macro (emb):  0.229685081906
f1_micro (emb):  0.252424359969
Experiment  BC3037   blogcatalog3
f1_macro (emb):  0.221577329243
f1_micro (emb):  0.254169899147
Experiment  BC3037   blogcatalog3
f1_macro (emb):  0.221293091649
f1_micro (emb):  0.255333591932
Experiment  BC3037   blogcatalog3
f1_macro (emb):  0.224068008228
f1_micro (emb):  0.250969743988
Experiment  BC3037   blogcatalog3
f1_macro (emb):  0.228838715368
f1_micro (emb):  0.253491078355
Experiment  BC3037   blogcatalog3
f1_macro (emb):  0.225331501214
f1_micro (emb):  0.25387897595
Experiment  BC3037   blogcatalog3
f1_macro (emb):  0.222920138981
f1_micro (emb):  0.250387897595
Experiment  BC3037   

In [12]:
for _ in range(20):
    lg('BC3038', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)

Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.260171570916
f1_micro (emb):  0.303335919317
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.254762997858
f1_micro (emb):  0.297323506594
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.25743532723
f1_micro (emb):  0.303238944919
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.267563603303
f1_micro (emb):  0.305081458495
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.269004306021
f1_micro (emb):  0.305081458495
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.255091690154
f1_micro (emb):  0.304402637704
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.260147912723
f1_micro (emb):  0.297323506594
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.258362612271
f1_micro (emb):  0.30702094647
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.259904784634
f1_micro (emb):  0.30236617533
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.259659674525
f1_micro (emb):  0.301299456943
Experiment  BC3038   bl

In [13]:
for _ in range(20):
    lg_blind('BC3038', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)

Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.125628519768
f1_micro (emb):  0.229295665635


  'precision', 'predicted', average, warn_for)


Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.127501774891
f1_micro (emb):  0.225232198142
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.121203762557
f1_micro (emb):  0.219620743034
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.130837380487
f1_micro (emb):  0.232972136223
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.130565046609
f1_micro (emb):  0.231811145511
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.120236174614
f1_micro (emb):  0.221749226006
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.121177889473
f1_micro (emb):  0.219814241486
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.125788843348
f1_micro (emb):  0.222716718266
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.1270515279
f1_micro (emb):  0.223877708978
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.127296444644
f1_micro (emb):  0.223297213622
Experiment  BC3038   blogcatalog3
f1_macro (emb):  0.125153984021
f1_micro (emb):  0.226780185759
Experiment  BC3038   b

In [14]:
for _ in range(20):
    lg('BC3039', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)

Experiment  BC3039   blogcatalog3
f1_macro (emb):  0.272430076607
f1_micro (emb):  0.309736229635
Experiment  BC3039   blogcatalog3
f1_macro (emb):  0.269787050523
f1_micro (emb):  0.308863460047
Experiment  BC3039   blogcatalog3
f1_macro (emb):  0.260445899292
f1_micro (emb):  0.305954228084
Experiment  BC3039   blogcatalog3
f1_macro (emb):  0.271514152797
f1_micro (emb):  0.305663304888
Experiment  BC3039   blogcatalog3
f1_macro (emb):  0.268520384798
f1_micro (emb):  0.307505818464
Experiment  BC3039   blogcatalog3
f1_macro (emb):  0.266558209784
f1_micro (emb):  0.307699767261
Experiment  BC3039   blogcatalog3
f1_macro (emb):  0.262145830816
f1_micro (emb):  0.305178432894
Experiment  BC3039   blogcatalog3
f1_macro (emb):  0.260714467463
f1_micro (emb):  0.308281613654
Experiment  BC3039   blogcatalog3
f1_macro (emb):  0.269280996724
f1_micro (emb):  0.307311869666
Experiment  BC3039   blogcatalog3
f1_macro (emb):  0.267863560888
f1_micro (emb):  0.305372381691
Experiment  BC3039  

In [15]:
for _ in range(20):
    lg('BC3040', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)

Experiment  BC3040   blogcatalog3
f1_macro (emb):  0.275524128926
f1_micro (emb):  0.316136539953
Experiment  BC3040   blogcatalog3
f1_macro (emb):  0.270397701975
f1_micro (emb):  0.313809154383
Experiment  BC3040   blogcatalog3
f1_macro (emb):  0.273105643594
f1_micro (emb):  0.31487587277
Experiment  BC3040   blogcatalog3
f1_macro (emb):  0.264758345568
f1_micro (emb):  0.310027152832
Experiment  BC3040   blogcatalog3
f1_macro (emb):  0.273808815013
f1_micro (emb):  0.317494181536
Experiment  BC3040   blogcatalog3
f1_macro (emb):  0.277764500512
f1_micro (emb):  0.318851823119
Experiment  BC3040   blogcatalog3
f1_macro (emb):  0.267394574694
f1_micro (emb):  0.309251357642
Experiment  BC3040   blogcatalog3
f1_macro (emb):  0.275343437538
f1_micro (emb):  0.310415050427
Experiment  BC3040   blogcatalog3
f1_macro (emb):  0.277832334118
f1_micro (emb):  0.317009309542
Experiment  BC3040   blogcatalog3
f1_macro (emb):  0.276323687384
f1_micro (emb):  0.312742435997
Experiment  BC3040   