In [None]:
import networkx as nx
import numpy as np

from node_embeddings.model import generate_batches, deepWalk

from sklearn.metrics import f1_score
from sklearn.linear_model import LogisticRegression

In [None]:
data_loc = './data/BlogCatalog3/BlogCatalog-dataset/data/'

In [None]:
def load_data():
    iid = {}
    idx = 0
    edgelist = []

    # Read edges pairs
    with open(data_loc+'edges.csv', 'r') as f:
        for line in f.readlines():
            i, j = line.strip().split(',')  # csv
            if i not in iid:
                iid[i] = idx; idx += 1
            if j not in iid:
                iid[j] = idx; idx += 1
            edgelist.append((iid[i], iid[j]))

    # Create an nx undirected network
    bc = nx.Graph(edgelist)

    print("Number of nodes: ", len(bc))
    print("Number of edges: ", bc.size())

    # Read labels
    labels = np.zeros((len(bc)), dtype=int)
    # Read (node_id, label) file
    with open(data_loc+'group-edges.csv', 'r') as f:
        for line in f.readlines():
            node, group = line.strip().split(',') 
            labels[iid[node]] = int(group)-1  

    bc_dataset = {'graph': bc, 'labels': labels}
    return bc_dataset

bc_dataset = load_data()

In [None]:
def objective():
    _

In [None]:
import pickle 
  
# Open the file in binary mode 
with open('./output/deepwalk_gp.pickle', 'rb') as file: 
      
    # Call load method to deserialze 
    gp_ = pickle.load(file) 
  


In [None]:
walks_per_vertex, walk_length, window_size,embedding_size,num_neg,lr,epochs,batch_size = gp_.x
embedding, loss_history = deepWalk(graph=bc_dataset['graph'],
                                walks_per_vertex=walks_per_vertex,
                                walk_length=walk_length,
                                window_size=window_size,
                                embedding_size=embedding_size,
                                num_neg=num_neg,
                                lr=lr,
                                epochs=epochs,
                                batch_size=batch_size)




In [None]:

X = embedding.detach().numpy()
y = bc_dataset['labels']

shuffle_idx = np.random.permutation(X.shape[0])
half_idx = X.shape[0]//2

X = X[shuffle_idx]  
y = y[shuffle_idx]

X_train,X_test = X[:half_idx], X[half_idx:]
y_train,y_test = y[:half_idx], y[half_idx:]

clf = LogisticRegression(random_state=0,max_iter=1000).fit(X_train, y_train)
y_hat = clf.predict(X_test)
f1_score(y_test, y_hat, average='macro')
