In [66]:
import sdne

from neo4j import GraphDatabase
import pandas as pd

host = 'bolt://localhost:7687'
user = 'neo4j'
password = 'wowhi223'
driver = GraphDatabase.driver(host,auth=(user, password))

In [55]:
import torch
def process_nxgraph(graph):
    node2idx = {}
    idx2node = []
    node_size = 0
    for node in graph.nodes():
        node2idx[node] = node_size
        idx2node.append(node)
        node_size += 1
    return idx2node, node2idx

In [56]:
class Regularization(torch.nn.Module):

    def __init__(self, model, gamma=0.01, p=2, device="cpu"):
        super().__init__()
        if gamma <= 0:
            print("param weight_decay can not be <= 0")
            exit(0)
        self.model = model
        self.gamma = gamma
        self.p = p
        self.device = device
        self.weight_list = self.get_weight_list(model)
        self.weight_info = self.get_weight_info(self.weight_list)

    def to(self, device):
        super().to(device)
        self.device = device
        return self

    def forward(self, model):
        self.weight_list = self.get_weight_list(model)
        reg_loss = self.regulation_loss(self.weight_list, self.gamma, self.p)
        return reg_loss

    def regulation_loss(self, weight_list, gamma, p=2):
        reg_loss = 0
        for name, w in weight_list:
            l2_reg = torch.norm(w, p=p)
            reg_loss += l2_reg
        reg_loss = reg_loss * gamma
        return reg_loss

    def get_weight_list(self, model):
        weight_list = []
        for name, param in model.named_parameters():
            if 'weight' in name:
                weight = (name, param)
                weight_list.append(weight)
        return weight_list

    def get_weight_info(self, weight_list):
        print("#"*10, "regulations weight", "#"*10)
        for name, param in weight_list:
            print(name)
        print("#"*25)

In [57]:
import torch
import torch.nn as nn
import numpy as np
import time
from tqdm import tqdm
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import f1_score, accuracy_score

In [58]:
class GraphBaseModel(nn.Module):
    def __init__(self):
        super().__init__()
        pass
    def fit(self):
        pass

class TopKRanker(OneVsRestClassifier):
    def predict(self, X, top_k_list):
        probs = np.asarray(super(TopKRanker, self).predict_proba(X))
        all_labels = []
        for i, k in enumerate(top_k_list):
            probs_ = probs[i, :]
            labels = self.classes_[probs_.argsort()[-k:]].tolist()
            probs_[:] = 0
            probs_[labels] = 1
            all_labels.append(probs_)
        return np.asarray(all_labels)

class MultiClassifier(object):
    def __init__(self, embeddings, clf):
        self.embeddings = embeddings
        self.clf = TopKRanker(clf)
        self.binarizer = MultiLabelBinarizer()

    def fit(self, X, y, y_all):
        self.binarizer.fit(y_all)
        X_train = [self.embeddings[x] for x in X]
        y_train = self.binarizer.transform(y)
        self.clf.fit(X_train, y_train)

    def predict(self, X, top_k_list):
        X_ = np.asarray([self.embeddings[x] for x in X])
        y_pred = self.clf.predict(X_, top_k_list=top_k_list)
        return y_pred

    def evaluate(self, X, y):
        top_k_list = [len(l) for l in y]
        y_pred = self.predict(X, top_k_list)
        y = self.binarizer.transform(y)
        averages = ["micro", "macro", "samples", "weighted"]
        results = {}
        for average in averages:
            results[average] = f1_score(y, y_pred, average=average)
        results['acc'] = accuracy_score(y, y_pred)
        print('-------------------')
        print(results)
        print('-------------------')
        return results

    def evaluate_hold_out(self, X, y, test_size=0.2, random_state=123):
        np.random.seed(random_state)
        train_size = int((1-test_size) * len(X))
        shuffle_indices = np.random.permutation(np.arange(len(X)))
        X_train = [X[shuffle_indices[i]] for i in range(train_size)]
        y_train = [y[shuffle_indices[i]] for i in range(train_size)]
        X_test = [X[shuffle_indices[i]] for i in range(train_size, len(X))]
        y_test = [y[shuffle_indices[i]] for i in range(train_size, len(X))]

        self.fit(X_train, y_train, y)

        return self.evaluate(X_test, y_test)

In [59]:
import torch
from basemodel import GraphBaseModel
from utils import process_nxgraph
import numpy as np
import scipy.sparse as sparse
from utils import Regularization

In [60]:
class SDNEModel(torch.nn.Module):
    
    def __init__(self, input_dim, hidden_layers, alpha, beta, device="cpu"):
        super(SDNEModel, self).__init__()
        self.alpha = alpha
        self.beta = beta
        self.device = device
        input_dim_copy = input_dim
        layers = []
        for layer_dim in hidden_layers:
            layers.append(torch.nn.Linear(input_dim, layer_dim))
            layers.append(torch.nn.ReLU())
            input_dim = layer_dim
        self.encoder = torch.nn.Sequential(*layers)

        layers = []
        for layer_dim in reversed(hidden_layers[:-1]):
            layers.append(torch.nn.Linear(input_dim, layer_dim))
            layers.append(torch.nn.ReLU())
            input_dim = layer_dim
        layers.append(torch.nn.Linear(input_dim, input_dim_copy))
        layers.append(torch.nn.ReLU())
        self.decoder = torch.nn.Sequential(*layers)
        
    def forward(self, A, L):
        Y = self.encoder(A)
        A_hat = self.decoder(Y)
        beta_matrix = torch.ones_like(A)
        mask = A != 0
        beta_matrix[mask] = self.beta
        loss_2nd = torch.mean(torch.sum(torch.pow((A - A_hat) * beta_matrix, 2), dim=1))
        loss_1st =  self.alpha * 2 * torch.trace(torch.matmul(torch.matmul(Y.transpose(0,1), L), Y))
        return loss_2nd + loss_1st

In [61]:
class SDNE(GraphBaseModel):
    
    def __init__(self, graph, hidden_layers=None, alpha=1e-5, beta=5, gamma=1e-5, device="cpu"):
        super().__init__()
        self.graph = graph
        self.idx2node, self.node2idx = process_nxgraph(graph)
        self.node_size = graph.number_of_nodes()
        self.edge_size = graph.number_of_edges()
        self.sdne = SDNEModel(self.node_size, hidden_layers, alpha, beta)
        self.device = device
        self.embeddings = {}
        self.gamma = gamma

        adjacency_matrix, laplace_matrix = self.__create_adjacency_laplace_matrix()
        self.adjacency_matrix = torch.from_numpy(adjacency_matrix.toarray()).float().to(self.device)
        self.laplace_matrix = torch.from_numpy(laplace_matrix.toarray()).float().to(self.device)

    def fit(self, batch_size=512, epochs=1, initial_epoch=0, verbose=1):
        num_samples = self.node_size
        self.sdne.to(self.device)
        optimizer = torch.optim.Adam(self.sdne.parameters())
        if self.gamma:
            regularization = Regularization(self.sdne, gamma=self.gamma)
        if batch_size >= self.node_size:
            batch_size = self.node_size
            print('batch_size({0}) > node_size({1}),set batch_size = {1}'.format(
                batch_size, self.node_size))
            for epoch in range(initial_epoch, epochs):
                loss_epoch = 0
                optimizer.zero_grad()
                loss = self.sdne(self.adjacency_matrix, self.laplace_matrix)
                if self.gamma:
                    reg_loss = regularization(self.sdne)
                    # print("reg_loss:", reg_loss.item(), reg_loss.requires_grad)
                    loss = loss + reg_loss
                loss_epoch += loss.item()
                loss.backward()
                optimizer.step()
                if verbose > 0:
                    print('Epoch {0}, loss {1} . >>> Epoch {2}/{3}'.format(epoch + 1, round(loss_epoch / num_samples, 4), epoch+1, epochs))
        else:
            steps_per_epoch = (self.node_size - 1) // batch_size + 1
            for epoch in range(initial_epoch, epochs):
                loss_epoch = 0
                for i in range(steps_per_epoch):
                    idx = np.arange(i * batch_size, min((i+1) * batch_size, self.node_size))
                    A_train = self.adjacency_matrix[idx, :]
                    L_train = self.laplace_matrix[idx][:,idx]
                    # print(A_train.shape, L_train.shape)
                    optimizer.zero_grad()
                    loss = self.sdne(A_train, L_train)
                    loss_epoch += loss.item()
                    loss.backward()
                    optimizer.step()

                if verbose > 0:
                    print('Epoch {0}, loss {1} . >>> Epoch {2}/{3}'.format(epoch + 1, round(loss_epoch / num_samples, 4),
                                                                         epoch + 1, epochs))
    def get_embeddings(self):
        if not self.embeddings:
            self.__get_embeddings()
        embeddings = self.embeddings
        return embeddings

    def __get_embeddings(self):
        embeddings = {}
        with torch.no_grad():
            self.sdne.eval()
            embed = self.sdne.encoder(self.adjacency_matrix)
            for i, embedding in enumerate(embed.numpy()):
                embeddings[self.idx2node[i]] = embedding
        self.embeddings = embeddings

    def __create_adjacency_laplace_matrix(self):
        node_size = self.node_size
        node2idx = self.node2idx
        adjacency_matrix_data = []
        adjacency_matrix_row_index = []
        adjacency_matrix_col_index = []
        for edge in self.graph.edges():
            v1, v2 = edge
            edge_weight = self.graph[v1][v2].get("weight", 1.0)
            adjacency_matrix_data.append(edge_weight)
            adjacency_matrix_row_index.append(node2idx[v1])
            adjacency_matrix_col_index.append(node2idx[v2])
        adjacency_matrix = sparse.csr_matrix((adjacency_matrix_data,
                                              (adjacency_matrix_row_index, adjacency_matrix_col_index)),
                                             shape=(node_size, node_size))
        adjacency_matrix_ = sparse.csr_matrix((adjacency_matrix_data+adjacency_matrix_data,
                                               (adjacency_matrix_row_index+adjacency_matrix_col_index,
                                                adjacency_matrix_col_index+adjacency_matrix_row_index)),
                                              shape=(node_size, node_size))
        degree_matrix = sparse.diags(adjacency_matrix_.sum(axis=1).flatten().tolist()[0])
        laplace_matrix = degree_matrix - adjacency_matrix_
        return adjacency_matrix, laplace_matrix

In [62]:
from sdne import SDNE
from basemodel import MultiClassifier
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np

In [63]:
def read_node_label(file_path, skip_head=False):
    X, y = [], []
    with open(file_path, "r") as f:
        if skip_head:
            f.readline()
        for line in f.readlines():
            tmp = line.strip().split(" ")
            X.append(tmp[0])
            y.append(tmp[1:])
    return X, y

def plot_embeddings(embeddings, X, y):
    embed_list = []
    for node in X:
        embed_list.append(embeddings[node])
    tsne = TSNE(n_components=2)
    node_tsned = tsne.fit_transform(np.asarray(embed_list), y)
    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(y[i][0], [])
        color_idx[y[i][0]].append(i)
    for c, idx in color_idx.items():
        plt.scatter(node_tsned[idx, 0], node_tsned[idx, 1], label=c)
    plt.legend()
    plt.show()

if __name__ == '__main__':

    import networkx as nx
    G = nx.read_edgelist('C:\\Users\\chaehyeon\\Desktop\\Wiki_edgelist.txt',
                         create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])

    model = SDNE(G, hidden_layers=[256, 128])
    model.fit(batch_size=1024, epochs=20)
    embeddings = model.get_embeddings()

    X, y = read_node_label('C:\\Users\\chaehyeon\\Desktop\\wiki_labels.txt')
    
    model = MultiClassifier(embeddings, LogisticRegression())

    '''model.evaluate_hold_out(X, y)

    plot_embeddings(embeddings, X, y)'''

########## regulations weight ##########
encoder.0.weight
encoder.2.weight
decoder.0.weight
decoder.2.weight
#########################
batch_size(90) > node_size(90),set batch_size = 90
Epoch 1, loss 0.2915 . >>> Epoch 1/20
Epoch 2, loss 0.2868 . >>> Epoch 2/20
Epoch 3, loss 0.2822 . >>> Epoch 3/20
Epoch 4, loss 0.2773 . >>> Epoch 4/20
Epoch 5, loss 0.2719 . >>> Epoch 5/20
Epoch 6, loss 0.2658 . >>> Epoch 6/20
Epoch 7, loss 0.2588 . >>> Epoch 7/20
Epoch 8, loss 0.251 . >>> Epoch 8/20
Epoch 9, loss 0.2424 . >>> Epoch 9/20
Epoch 10, loss 0.2335 . >>> Epoch 10/20
Epoch 11, loss 0.2245 . >>> Epoch 11/20
Epoch 12, loss 0.2165 . >>> Epoch 12/20
Epoch 13, loss 0.2105 . >>> Epoch 13/20
Epoch 14, loss 0.207 . >>> Epoch 14/20
Epoch 15, loss 0.2051 . >>> Epoch 15/20
Epoch 16, loss 0.2031 . >>> Epoch 16/20
Epoch 17, loss 0.1996 . >>> Epoch 17/20
Epoch 18, loss 0.1946 . >>> Epoch 18/20
Epoch 19, loss 0.1896 . >>> Epoch 19/20
Epoch 20, loss 0.1855 . >>> Epoch 20/20


In [64]:
class MNN(nn.Module):
    def __init__(self, node_size, nhid0, nhid1, droput, alpha):
        super(MNN, self).__init__()
        self.encode0 = nn.Linear(node_size, nhid0)
        self.encode1 = nn.Linear(nhid0, nhid1)
        self.decode0 = nn.Linear(nhid1, nhid0)
        self.decode1 = nn.Linear(nhid0, node_size)
        self.droput = droput
        self.alpha = alpha

    def forward(self, adj_batch, adj_mat, b_mat):
        t0 = F.leaky_relu(self.encode0(adj_batch))
        t0 = F.leaky_relu(self.encode1(t0))
        embedding = t0
        t0 = F.leaky_relu(self.decode0(t0))
        t0 = F.leaky_relu(self.decode1(t0))
        embedding_norm = torch.sum(embedding * embedding, dim=1, keepdim=True)
        L_1st = torch.sum(adj_mat * (embedding_norm -
                                     2 * torch.mm(embedding, torch.transpose(embedding, dim0=0, dim1=1))
                                     + torch.transpose(embedding_norm, dim0=0, dim1=1)))
        L_2nd = torch.sum(((adj_batch - t0) * b_mat) * ((adj_batch - t0) * b_mat))
        return L_1st, self.alpha * L_2nd, L_1st + self.alpha * L_2nd

    def savector(self, adj):
        t0 = self.encode0(adj)
        t0 = self.encode1(t0)
        return t0

In [31]:
from data import dataset

ModuleNotFoundError: No module named 'data'

In [32]:
import networkx as nx
import numpy as np
from torch.utils import data
from torch.utils.data import DataLoader
import torch

def Read_graph(file_name):
    edge = np.loadtxt(file_name).astype(np.int32)
    min_node, max_node = edge.min(), edge.max()
    if min_node == 0:
        Node = max_node + 1
    else:
        Node = max_node
    G = nx.Graph()
    Adj = np.zeros([Node, Node], dtype=np.int32)
    for i in range(edge.shape[0]):
        G.add_edge(edge[i][0], edge[i][1])
        if min_node == 0:
            Adj[edge[i][0], edge[i][1]] = 1
            Adj[edge[i][1], edge[i][0]] = 1
        else:
            Adj[edge[i][0] - 1, edge[i][1] - 1] = 1
            Adj[edge[i][1] - 1, edge[i][0] - 1] = 1
    Adj = torch.FloatTensor(Adj)
    return G, Adj, Node

class Dataload(data.Dataset):

    def __init__(self, Adj, Node):
        self.Adj = Adj
        self.Node = Node
    def __getitem__(self, index):
        return index
        # adj_batch = self.Adj[index]
        # adj_mat = adj_batch[index]
        # b_mat = torch.ones_like(adj_batch)
        # b_mat[adj_batch != 0] = self.Beta
        # return adj_batch, adj_mat, b_mat
    def __len__(self):
        return self.Node

if __name__ == '__main__':
    G, Adj, Node = Read_graph('./karate/karate.edgelist')
    Data = Dataload(Adj, Node)
    Test = DataLoader(Data, batch_size=20, shuffle=True)
    for index in Test:
        adj_batch = Adj[index]
        adj_mat = adj_batch[:, index]
        b_mat = torch.ones_like(adj_batch)
        b_mat[adj_batch != 0] = 5
        print(b_mat)

OSError: ./karate/karate.edgelist not found.