In [1]:
import networkx as nx
import numpy as np

In [2]:
import keras 
import os

zip_file = keras.utils.get_file(
    fname="cora.tgz",
    origin="https://linqs-data.soe.ucsc.edu/public/lbc/cora.tgz",
    extract=True,
)
data_dir = os.path.join(os.path.dirname(zip_file), "cora")

2024-03-10 00:57:23.168965: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-10 00:57:23.170593: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-10 00:57:23.201193: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-10 00:57:23.201956: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
import pandas as pd

citations = pd.read_csv(
    os.path.join(data_dir, "cora.cites"),
    sep="\t",
    header=None,
    names=["target", "source"],
)
print("Citations shape:", citations.shape)


Citations shape: (5429, 2)


In [4]:
citations

Unnamed: 0,target,source
0,35,1033
1,35,103482
2,35,103515
3,35,1050679
4,35,1103960
...,...,...
5424,853116,19621
5425,853116,853155
5426,853118,1140289
5427,853155,853118


In [5]:
column_names = ["paper_id"] + [f"term_{idx}" for idx in range(1433)] + ["subject"]
papers = pd.read_csv(
    os.path.join(data_dir, "cora.content"), sep="\t", header=None, names=column_names,
)
print("Papers shape:", papers.shape)

Papers shape: (2708, 1435)


In [6]:
class_values = sorted(papers["subject"].unique())
class_idx = {name: id for id, name in enumerate(class_values)}
paper_idx = {name: idx for idx, name in enumerate(sorted(papers["paper_id"].unique()))}

papers["paper_id"] = papers["paper_id"].apply(lambda name: paper_idx[name])
citations["source"] = citations["source"].apply(lambda name: paper_idx[name])
citations["target"] = citations["target"].apply(lambda name: paper_idx[name])
papers["subject"] = papers["subject"].apply(lambda value: class_idx[value])

In [7]:
papers

Unnamed: 0,paper_id,term_0,term_1,term_2,term_3,term_4,term_5,term_6,term_7,term_8,...,term_1424,term_1425,term_1426,term_1427,term_1428,term_1429,term_1430,term_1431,term_1432,subject
0,462,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,2
1,1911,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,5
2,2002,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
3,248,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,4
4,519,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2703,2370,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2704,2371,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2705,2372,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2706,955,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
cora_graph = nx.from_pandas_edgelist(citations.sample(n=1500))


In [10]:
cora_graph.nodes()

NodeView((2047, 996, 341, 236, 140, 355, 1621, 747, 2290, 464, 2121, 121, 370, 371, 2299, 604, 2168, 781, 1901, 87, 2150, 108, 2575, 333, 1865, 1559, 2441, 1703, 2403, 20, 198, 1102, 1930, 270, 1916, 1081, 65, 286, 1476, 149, 368, 122, 2357, 71, 1247, 15, 2597, 749, 2294, 49, 1909, 504, 1983, 742, 2677, 1582, 861, 703, 2684, 1664, 2443, 1708, 220, 230, 2278, 385, 312, 27, 2631, 549, 2370, 1066, 1536, 1537, 936, 411, 227, 226, 301, 3, 2177, 416, 444, 197, 2226, 1162, 1672, 1210, 1173, 351, 613, 173, 2046, 415, 995, 1040, 1731, 1722, 244, 455, 1525, 1112, 627, 728, 1766, 686, 2069, 1009, 251, 2174, 394, 1265, 50, 1789, 1398, 1609, 576, 838, 1727, 348, 1357, 2198, 1392, 2061, 434, 2243, 823, 1122, 237, 177, 2702, 0, 2249, 878, 2375, 399, 710, 222, 1151, 1150, 90, 200, 841, 2615, 453, 388, 340, 1592, 585, 484, 792, 1012, 101, 1362, 13, 326, 1871, 1872, 791, 485, 1576, 1580, 1195, 2025, 165, 860, 2541, 33, 171, 397, 2277, 448, 1219, 495, 556, 367, 373, 47, 109, 2104, 894, 414, 2, 1601, 780,

In [11]:
G=cora_graph

In [12]:
adj = (1.0 * (nx.adjacency_matrix(G)>0)).toarray()

In [13]:
import tensorflow as tf

In [14]:
from keras import backend as K 
from keras.layers import Layer

In [15]:
from functools import cached_property

class GraphConvolution(Layer):
    """Basic graph convolution layer for undirected graph without edge labels."""
    
    def __init__(self, output_dim, graph, activation, **kwargs):    
        self.output_dim = output_dim 
        self.graph = graph
        self.activation = activation
        super(GraphConvolution, self).__init__(**kwargs)

    @staticmethod
    def preprocess_graph(adj):
        adj_ = adj + np.eye(adj.shape[0])
        
        degree = np.array(adj_.sum(1))
        
        degree_mat_inv_sqrt = np.diag(np.power(degree, -0.5).flatten())
        adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt)
    
        return adj_normalized

    
    @cached_property
    def adj(self):
        return (1.0 * (nx.adjacency_matrix(self.graph)>0)).toarray()
    
    def build(self, input_shape): 
        self.w = self.add_weight(
            name = 'w', 
            shape = (input_shape[1], self.output_dim), 
            initializer = 'normal', trainable = True
        ) 
        self._adj = tf.constant(self.preprocess_graph(self.adj), shape=self.adj.shape, dtype=np.float32)
        super(GraphConvolution, self).build(input_shape)

    def call(self, input_data): 
        x = K.dot(input_data, self.w)
        x = K.dot(self._adj, x) 
        return self.activation(x)
    
    def compute_output_shape(self, input_shape): 
        return (input_shape[0], self.output_dim)

class InnerProductDecoder(Layer):
    """Decoder model layer for link prediction."""

    def __init__(self, **kwargs):    
        self.activation = tf.nn.sigmoid
        super(InnerProductDecoder, self).__init__(**kwargs)
        
    def call(self, inputs):
        x = tf.transpose(inputs)
        x = tf.matmul(inputs, x)
        # x = tf.reshape(x, [-1])
        return self.activation(x)



In [16]:
from keras import layers

In [17]:
from keras import Input, Model

n = len(G.nodes())

input_img = Input(shape=(n), batch_size=n) 

hidden = GraphConvolution(10, G, activation=tf.nn.relu)(input_img)
embedding = GraphConvolution(2, G, activation=tf.nn.relu)(hidden)

reconstructed = InnerProductDecoder()(embedding)

In [18]:
encoder = Model(input_img, embedding)

model = Model(input_img, reconstructed)

In [19]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(1660, 1660)]            0         
                                                                 
 graph_convolution (GraphCo  (1660, 10)                16600     
 nvolution)                                                      
                                                                 
 graph_convolution_1 (Graph  (1660, 2)                 20        
 Convolution)                                                    
                                                                 
 inner_product_decoder (Inn  (1660, 1660)              0         
 erProductDecoder)                                               
                                                                 
Total params: 16620 (64.92 KB)
Trainable params: 16620 (64.92 KB)
Non-trainable params: 0 (0.00 Byte)
_______________________

In [20]:
model.compile(optimizer='adam', loss='binary_crossentropy')

In [21]:
x_train = np.eye(n)

In [23]:
y_train = adj

In [24]:
for _ in range(20):
    model.fit(x_train, y_train, batch_size=n)



In [25]:
output = encoder(x_train)

In [26]:
output

<tf.Tensor: shape=(1660, 2), dtype=float32, numpy=
array([[0.        , 0.        ],
       [0.        , 0.        ],
       [0.        , 0.        ],
       ...,
       [0.00156295, 0.        ],
       [0.        , 0.        ],
       [0.00103919, 0.        ]], dtype=float32)>

In [82]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 graph_convolution_9 (Graph  (34, 10)                  340       
 Convolution)                                                    
                                                                 
 graph_convolution_10 (Grap  (34, 4)                   40        
 hConvolution)                                                   
                                                                 
Total params: 380 (1.48 KB)
Trainable params: 380 (1.48 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [45]:
len(G.nodes)

34

In [None]:
    
    def __init__(self, input_dim, output_dim, adj, dropout=0., act=tf.nn.relu):
        self.dropout = dropout
        self.adj = adj
        self.act = act

    def _call(self, inputs):
        x = inputs
        x = tf.nn.dropout(x, 1-self.dropout)
        x = tf.matmul(x, self.vars['weights'])
        x = tf.sparse_tensor_dense_matmul(self.adj, x)
        outputs = self.act(x)
        return outputs