In [1]:
import numpy as np
import random
import networkx as nx
from matplotlib import pyplot as plt
import pandas as pd
import copy
import os
import sys
import networkx as nx
from scipy.sparse import lil_matrix, coo_matrix
from scipy.io import mmwrite

In [2]:
# 一つ上の階層のmoduleをインポートできるようにする
current_dir = os.path.dirname(os.path.abspath("__file__"))
sys.path.append( str(current_dir) + '/../' )

In [3]:
from setting_param import MakeSample_link_prediction_appeared_InputDir as InputDir
from setting_param import MakeSample_link_prediction_appeared_OutputDir as OutputDir
from setting_param import L
from setting_param import attribute_dim

In [4]:
os.mkdir(OutputDir)
os.mkdir(OutputDir + "/input/")
os.mkdir(OutputDir + "/input/node_attribute/")
os.mkdir(OutputDir + "/input/adjacency")
os.mkdir(OutputDir + "/label/")
os.mkdir(OutputDir + "/mask/")

In [5]:
# READ EXIST_TABLE
EXIST_TABLE = np.load(InputDir + '/exist_table.npy')
EXIST_TABLE.shape

(3859, 68)

In [6]:
n_node = EXIST_TABLE.shape[0]

In [7]:
def ExistNodeList(ts):
    assert ts >= 0, "ts < 0 [referrence error]"
    return np.where(EXIST_TABLE[:, ts]==1)[0]

def GetAppearedNodes(ts):
    return set(ExistNodeList(ts)) - set(ExistNodeList(ts-1))

def GetObservedNodes(ts, L):
    U = set()
    for i in range(L):
        U |= set(ExistNodeList(ts-i))
    return U

def GetNodes(ts, L, node_type):
    if node_type=='all':
        node_set = set(ExistNodeList(ts))
    elif node_type=='stay':
        node_set = set(ExistNodeList(ts-1)) & set(ExistNodeList(ts))
    elif node_type=='lost':
        node_set = set(ExistNodeList(ts-1)) - set(ExistNodeList(ts))
    elif node_type=='return':
        node_set = GetAppearedNodes(ts) - (GetAppearedNodes(ts) - GetObservedNodes(ts-1, L))
    elif node_type=='new':
        node_set = GetAppearedNodes(ts) - GetObservedNodes(ts-1, L)
        node_set |= GetNodes(ts, L, 'return')
    return node_set

In [8]:
def Nx(ts):
    return  nx.from_numpy_matrix(np.load(InputDir + '/adjacency' + str(ts) + '.npy'))

def SubNxNew(ts, L):
    return nx.Graph(Nx(ts).edges(GetNodes(ts, L, 'new')))

def SubNxLost(ts, L):
    return nx.Graph(Nx(ts-1).edges(GetNodes(ts, L, 'lost')))

def GetEdges(ts, L, edge_type):
    G_1 = Nx(ts)
    if edge_type == "all":
        edge_set = G_1.edges
    elif edge_type == 'stay':
        G_0 = Nx(ts - 1)
        edge_set = G_0.edges & G_1.edges
    elif edge_type == "appeared":
        G_0 = Nx(ts - 1)
        edge_set = G_1.edges - G_0.edges - SubNxNew(ts, L).edges
    elif edge_type == "disappeared":
        G_0 = Nx(ts - 1)
        edge_set = G_0.edges - G_1.edges - SubNxLost(ts, L).edges
    return edge_set

In [9]:
def get_adjacency_matrix(ts, L, edge_type):
    G = nx.Graph(list(GetEdges(ts, L, edge_type)))
    A = np.array(nx.to_numpy_matrix(G, nodelist=[i for i in range(n_node)]))
    return A

def get_exist_matrix(ts):
    index = np.where(EXIST_TABLE[:, ts] == 1)[0]
    exist_row = np.zeros((n_node, n_node))
    exist_row[index] = 1
    exist_col = np.zeros((n_node, n_node))
    exist_col[:, index] = 1
    return exist_row * exist_col

def NodeAttribute(ts):
    return  np.load(InputDir + '/node_attribute' + str(ts) + '.npy')

In [10]:
def TsSplit(ts, L):
    ts_train = [(ts+l) for l in range(L)]
    ts_test = ts_train[-1]+1
    ts_all = ts_train.copy()
    ts_all.extend([ts_test])
    return ts_train, ts_test, ts_all

In [11]:
for ts in range(L, EXIST_TABLE.shape[1]-L):
    ts_train, ts_test, ts_all = TsSplit(ts, L)
    node_attribute = np.zeros((n_node, attribute_dim * L))
    npy_adjacency_matrix = np.zeros((n_node, n_node * L))
    for idx, ts_ in enumerate(ts_train):
        node_attribute[:, attribute_dim*idx : attribute_dim*(idx+1)] = NodeAttribute(ts_)
        npy_adjacency_matrix[:, n_node*idx : n_node*(idx+1)] = get_adjacency_matrix(ts_, L, 'all')

    lil_adjacency_matrix = lil_matrix(npy_adjacency_matrix)
    lil_node_attribute = lil_matrix(node_attribute)
    mmwrite(OutputDir + "/input/node_attribute/" + str(ts), lil_node_attribute)
    mmwrite(OutputDir + "/input/adjacency/" + str(ts), lil_adjacency_matrix)
    mmwrite(OutputDir + "/label/" + str(ts), lil_matrix(get_adjacency_matrix(ts_test, L, "appeared")))
    
    exist_matrix = get_exist_matrix(ts_train[-1])
    np.fill_diagonal(exist_matrix, 0)
    mask = exist_matrix - get_adjacency_matrix(ts_train[-1], L, 'all')
    mmwrite(OutputDir + "/mask/" + str(ts), lil_matrix(mask))

In [12]:
for ts in range(L, EXIST_TABLE.shape[1]-L):
    ts_train, ts_test, ts_all = TsSplit(ts, L)
    adjacency = get_adjacency_matrix(ts_train[-1], L, 'all')
    exist_matrix = get_exist_matrix(ts_train[-1])
    np.fill_diagonal(exist_matrix, 0)
    mask = exist_matrix - adjacency
    print(ts_train[-1], mask.sum(), get_adjacency_matrix(ts_test, L, "appeared").sum(), get_adjacency_matrix(ts_train[-1], L, "all").sum())
    print((mask * get_adjacency_matrix(ts_train[-1], L, "all")).sum())

5 7386.0 58.0 986.0
0.0
6 7232.0 148.0 958.0
0.0
7 8050.0 216.0 1070.0
0.0
8 8558.0 122.0 1144.0
0.0
9 7400.0 178.0 972.0
0.0
10 8046.0 140.0 1074.0
0.0
11 7552.0 90.0 1004.0
0.0
12 10334.0 190.0 1222.0
0.0
13 12146.0 250.0 1426.0
0.0
14 10944.0 274.0 1266.0
0.0
15 11548.0 300.0 1334.0
0.0
16 10934.0 280.0 1276.0
0.0
17 13612.0 336.0 1394.0
0.0
18 20894.0 446.0 1756.0
0.0
19 26200.0 476.0 1856.0
0.0
20 27148.0 594.0 1922.0
0.0
21 44310.0 696.0 2562.0
0.0
22 43898.0 764.0 2542.0
0.0
23 43490.0 560.0 2520.0
0.0
24 46356.0 796.0 2706.0
0.0
25 52132.0 686.0 2858.0
0.0
26 53468.0 640.0 2938.0
0.0
27 83046.0 1188.0 3684.0
0.0
28 77486.0 1026.0 3454.0
0.0
29 74794.0 1160.0 3326.0
0.0
30 78586.0 954.0 3496.0
0.0
31 88344.0 1060.0 3768.0
0.0
32 95472.0 1330.0 4068.0
0.0
33 95464.0 1214.0 4076.0
0.0
34 91882.0 1234.0 3908.0
0.0
35 97898.0 1150.0 4182.0
0.0
36 100978.0 1318.0 4322.0
0.0
37 107276.0 1332.0 4614.0
0.0
38 105362.0 1410.0 4530.0
0.0
39 119556.0 1402.0 4700.0
0.0
40 139712.0 1470.0 50