In [1]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p

from sklearn.preprocessing import normalize, scale, MultiLabelBinarizer
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV

In [46]:
class TopKRanker(OneVsRestClassifier):
    def predict(self, X, top_k_list):
        assert X.shape[0] == len(top_k_list)
        probs = np.asarray(super(TopKRanker, self).predict_proba(X))
        all_labels = []
        for i, k in enumerate(top_k_list):
            probs_ = probs[i, :]
            labels = self.classes_[probs_.argsort()[-k:]].tolist()
            all_labels.append(labels)
        return all_labels
    
def lg(exp_id, graph_name, index=[0], norm=False, split=0.5, use_bias=False,
       max_iter=100, C=1e9, ic=200, test_with_training_data=True, cv=None):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    emb = None
    if index is None:
        emb = w
    else:
        for i in index:
            if emb is None:
                emb = w[i]
            else:
                emb += w[i]
        emb /= len(index)
    if use_bias:
        emb[:,-1] = w[2].reshape((-1,))
    if norm:
        emb = normalize(emb)
    x_train, yl_train, x_test, yl_test = graph.get_ids_labels(split=split)
    X_train = [emb[i] for i in x_train]
    Y_train = MultiLabelBinarizer().fit_transform(yl_train)
    if cv:
        learner = TopKRanker(LogisticRegressionCV(fit_intercept=True, cv=cv, 
                                       solver='lbfgs', max_iter=max_iter, 
                                       intercept_scaling=ic))
    else:
        learner = TopKRanker(LogisticRegression())
    predictor = learner.fit(X_train, Y_train)
    if test_with_training_data:
        eval_list = graph.nodes()
        top_k = [len(graph._communities[i]) for i in graph.nodes()]
    else:
        eval_list = x_test
        top_k = [len(i) for i in yl_test]
    X_test = np.array([emb[i] for i in eval_list])
    y_pred = predictor.predict(X_test, top_k)
    y_true = [graph._communities[i] for i in eval_list]
    y_pred = MultiLabelBinarizer().fit_transform(y_pred)
    y_true = MultiLabelBinarizer().fit_transform(y_true)
    print(y_pred.shape)
    print(y_true.shape)
    averages = ["micro", "macro"]
    for average in averages:
        print(exp_id, graph_name, index, split, 'f1', average)
        print(f1_score(y_true,  y_pred, average=average))

In [45]:
la = lg('BC3_deepwalk', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=False)

KeyError: 7