In [1]:
import numpy as np
import pandas as pd
import keras
from keras import Model, Input
import tensorflow as tf
from sklearn.metrics import precision_score, recall_score

In [2]:
# Graph Edges as Frame
raw_edges = pd.read_csv('data/edges.csv')
raw_edges.columns = ['u', 'v']

# Number of Nodes. Got 41 772.
N = max(raw_edges['u'].max(), raw_edges['v'].max()) + 1

# Number of Edges. Got 125 826.
M = len(raw_edges)

# Sample
raw_edges = raw_edges.sample(M)

In [3]:
# Train / Test Edges
r = int(.8 * M)
train_positive_edges = raw_edges[:r]
test_positive_edges = raw_edges[r:]

# Logs
print(len(train_positive_edges), len(test_positive_edges))

100660 25166


In [4]:
class ReduceSumLayer(keras.layers.Layer):
    def call(self, x):
        return tf.reduce_sum(x, axis=-1)

In [5]:
# Input U. Got [None, 1].
input_u = Input(shape=(1,))

# Input V. Got [None,1].
input_v = Input(shape=(1,))

# Embeddings matrix. Got [N,k].
k = 4
z = keras.layers.Embedding(input_dim=N, output_dim=k)

# Embedded u. Got [None, 1, k] -> [None, k]
zu = z(input_u)
zu = keras.layers.Reshape(target_shape=(k,))(zu)

# Embedded v. Got [None, 1, k] -> [None, k]
zv = z(input_v)
zv = keras.layers.Reshape(target_shape=(k,))(zv)

# Multiply. Got [None, k].
z_uv = keras.layers.Multiply()([zu, zv])

# Reduce Sum. Got [None, 1]
s_uv = ReduceSumLayer()(z_uv)

# Sigmoid. Got [None, 1]
y_uv = keras.activations.sigmoid(s_uv)

# Model
m = Model(inputs=[input_u, input_v], outputs=y_uv)



In [6]:
# Compile
opt = keras.optimizers.Adam(learning_rate=1e-3)
loss = keras.losses.BinaryCrossentropy()
m.compile(optimizer=opt, loss=loss)

In [7]:
# Batches. Got [M,1]
batch_train_u = np.matrix(train_positive_edges['u']).T
batch_test_u = np.matrix(test_positive_edges['u']).T

# Batches. Got [M,1]
batch_train_v = np.matrix(train_positive_edges['v']).T
batch_test_v = np.matrix(test_positive_edges['v']).T

# Ground truth. Got [M,1].
y_true_train = np.ones((len(train_positive_edges), 1))
y_true_test = np.ones((len(test_positive_edges), 1))

# Logs
print("Train", batch_train_u.shape, batch_train_v.shape, y_true_train.shape)
print("Test", batch_test_u.shape, batch_test_v.shape, y_true_train.shape)

Train (100660, 1) (100660, 1) (100660, 1)
Test (25166, 1) (25166, 1) (100660, 1)


In [8]:
# Fit
m.fit(x=[batch_train_u, batch_train_v], y=y_true_train, batch_size=163072, epochs=512)

Epoch 1/512
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 566ms/step - loss: 0.6931
Epoch 2/512
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - loss: 0.6931
Epoch 3/512
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - loss: 0.6931
Epoch 4/512
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - loss: 0.6931
Epoch 5/512
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.6931
Epoch 6/512
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.6931
Epoch 7/512
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - loss: 0.6931
Epoch 8/512
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - loss: 0.6931
Epoch 9/512
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 0.6931
Epoch 10/512
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.6931
Epoch 11

<keras.src.callbacks.history.History at 0x1eeec08a6d0>

In [9]:
# Inference
y_pred_train = m.predict([batch_train_u, batch_train_v], batch_size=163072)
y_pred_test = m.predict([batch_test_u, batch_test_v], batch_size=163072)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step


In [10]:
# Precision
print(precision_score(y_true=y_true_train, y_pred=y_pred_train.round()))
# Recall
print(recall_score(y_true=y_true_train, y_pred=y_pred_train.round()))
# Truth
pd.Series(y_pred_train.round()).value_counts()

1.0
0.9845817603814823


1.0    99108
0.0     1552
Name: count, dtype: int64

In [11]:
# Precision
print(precision_score(y_true=y_true_test, y_pred=y_pred_test.round()))
# Recall
print(recall_score(y_true=y_true_test, y_pred=y_pred_test.round()))
# Truth
pd.Series(y_pred_test.round()).value_counts()

1.0
0.7485893666057379


1.0    18839
0.0     6327
Name: count, dtype: int64