In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
import tensorflow as tf

2023-08-17 16:57:40.461926: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
import torch
from torch_geometric.data import Data

  from .autonotebook import tqdm as notebook_tqdm


# Data Creation

```
1 - 2          1     
|   |         /  \       1 - 2
3 - 4        2 -- 3   
```

There are the type of graphs made for our GNN.
Each node will contain a pandas data frame where the features are:

```
a b c
- - - 
G C ? 
```
Where G is given and C is calculated. Feature B is calculated by taking feature a's of its neighbors. C is calulcuated by (a + b) in a row.

Here are adjacency matrix is (respectively):
$$
\begin{bmatrix}
0 & 1 & 1 & 0\\
1 & 0 & 0 & 1\\
1 & 0 & 0 & 1\\
0 & 1 & 1 & 0
\end{bmatrix}
$$

$$
\begin{bmatrix}
0 & 1 & 1\\
1 & 0 & 1\\
1 & 1 & 0\\
\end{bmatrix}
$$

$$
\begin{bmatrix}
0 & 1 \\
1 & 0
\end{bmatrix}
$$

In [None]:
G1A = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [1, 0, 0, 1],[0, 1, 1, 0]])
G2A = np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]])
G3A = np.array([[0, 1,], [1, 0]])

For the respective graphs:
```
1:     2:     3:     4:
a b c  a b c  a b c  a b c
- - -  - - -  - - -  - - -
2 5 7  3 3 6  2 3 5  1 5 6

1:      2:      3:     
a b c   a b c   a b c  
- - -   - - -   - - - 
4 7 11  5 6 11  2 9 11 

1:     2:   
a b c  a b c 
- - -  - - - 
2 1 3  1 2 3  
```


In [None]:
G1_1 = {
    "a": [2],
    "b": [5],
    "c": [7],
}

G1_2 = {
    "a": [3],
    "b": [3],
    "c": [6],
}

G1_3 = {
    "a": [2],
    "b": [3],
    "c": [5],
}

G1_4 = {
    "a": [1],
    "b": [5],
    "c": [6],
}

G2_1 = {
    "a": [4],
    "b": [7],
    "c": [11],
}

G2_2 = {
    "a": [5],
    "b": [6],
    "c": [11],
}

G2_3 = {
    "a": [2],
    "b": [9],
    "c": [11],
}

G3_1 = {
    "a": [2],
    "b": [1],
    "c": [3],
}

G3_2 = {
    "a": [1],
    "b": [2],
    "c": [3],
}

data = [G1_1, G1_2, G1_3, G1_4, G2_1, G2_2, G2_3, G3_1, G3_2]
dfs = []

for dataframe in data:
    df = pd.DataFrame(dataframe)
    dfs.append(df)
    
dfs[1]

# Building the dataset [Normal]

In [None]:
def normalize_adjacency(adjacency_matrix):
    degrees = np.sum(adjacency_matrix, axis=1)
    D_inv_sqrt = np.diag(np.power(degrees, -0.5))
    normalized_adjacency = np.dot(np.dot(D_inv_sqrt, adjacency_matrix), D_inv_sqrt)
    return normalized_adjacency

In [None]:
G1A_list = [np.float32(normalize_adjacency(G1A)) for i in range(1000)]
G2A_list = [np.float32(normalize_adjacency(G1A)) for i in range(1000)]
G3A_list = [np.float32(normalize_adjacency(G1A)) for i in range(1000)]

In [None]:
A_list = G1A_list + G2A_list + G3A_list

In [None]:
print(len(A_list))
# print(A_list)

In [None]:
G1_F = np.concatenate((
    np.float32(dfs[0].drop(columns=["b"]).to_numpy()), 
    np.float32(dfs[1].drop(columns=["b"]).to_numpy()), 
    np.float32(dfs[2].drop(columns=["b"]).to_numpy()), 
    np.float32(dfs[3].drop(columns=["b"]).to_numpy())
))

G2_F = np.concatenate((
    np.float32(dfs[4].drop(columns=["b"]).to_numpy()), 
    np.float32(dfs[5].drop(columns=["b"]).to_numpy()), 
    np.float32(dfs[6].drop(columns=["b"]).to_numpy())
))

G3_F = np.concatenate((
    np.float32(dfs[7].drop(columns=["b"]).to_numpy()), 
   np.float32( dfs[8].drop(columns=["b"]).to_numpy())
)) 

In [None]:
G1_F_list = [G1_F for i in range(1000)]
G2_F_list = [G2_F for i in range(1000)]
G3_F_list = [G3_F for i in range(1000)]

In [None]:
X_list = G1_F_list + G2_F_list + G3_F_list

In [None]:
print(len(X_list))
# print(X_list)

In [None]:
G1_y = np.concatenate((
    dfs[0].drop(columns=["a", "c"]).to_numpy(), 
    dfs[1].drop(columns=["a", "c"]).to_numpy(), 
    dfs[2].drop(columns=["a", "c"]).to_numpy(), 
    dfs[3].drop(columns=["a", "c"]).to_numpy()
))

G2_y = np.concatenate((
    dfs[4].drop(columns=["a", "c"]).to_numpy(), 
    dfs[5].drop(columns=["a", "c"]).to_numpy(), 
    dfs[6].drop(columns=["a", "c"]).to_numpy()
))

G3_y = np.concatenate((
    dfs[7].drop(columns=["a", "c"]).to_numpy(), 
    dfs[8].drop(columns=["a", "c"]).to_numpy()
)) 

In [None]:
G1_y_list = [np.float32(G1_y) for i in range(1000)]
G2_y_list = [np.float32(G2_y) for i in range(1000)]
G3_y_list = [np.float32(G3_y) for i in range(1000)]

In [None]:
y_list = G1_y_list + G2_y_list + G3_y_list

In [None]:
print(len(y_list))
# print(y_list)

# Test Train Split

In [None]:
A_train, A_test, X_train, X_test, y_train, y_test = train_test_split(
    A_list, 
    X_list, 
    y_list, 
    test_size=0.2, 
    random_state=42
)

# Building Dataset [Torch Geometric]

# Graph Neural Network [Torch Geometric]

In [None]:
def graph_convolution_layer(adjacency, input_features, output_dim, activation):
    transformed_features = tf.matmul(tf.matmul(adjacency, input_features),
                                     tf.Variable(tf.random.normal(shape=(input_features.shape[1], output_dim))))
    return activation(transformed_features)

In [None]:
def graph_convolution_model():
    input_adjacency = tf.keras.Input(shape=(None, None), dtype=tf.float32, name="adjacency")
    input_features = tf.keras.Input(shape=(None, 1), dtype=tf.float32, name="features")
    
    gnn_layer = GraphConvolutionLayer(units=16)
    hidden_layer = tf.keras.layers.Dense(32, activation="relu")(gnn_layer([input_adjacency, input_features]))
    output_layer = tf.keras.layers.Dense(1, activation="linear")  # Each node predicts one feature
    
    output = output_layer(hidden_layer)
    
    gnn_model = Model(inputs=[input_adjacency, input_features], outputs=output)
    gnn_model.compile(optimizer="adam", loss="mean_squared_error")
    
    return gnn_model

In [None]:
# Training loop
num_epochs = 1
batch_size = 1

gnn_model = graph_convolution_model()

In [None]:
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    
    # Mini-batch training
    indices = np.arange(len(X_train))
    np.random.shuffle(indices)
    
    for start in range(0, len(indices), batch_size):
        batch_indices = indices[start : start + batch_size]
        batch_A = [A_train[i] for i in batch_indices]
        batch_X = [X_train[i] for i in batch_indices]
        batch_y = [y_train[i] for i in batch_indices]
        
        loss = gnn_model.train_on_batch([batch_A, batch_X], batch_y)
        print(f"  Batch Loss: {loss}")
    
    # Evaluate on test set
    test_loss = gnn_model.evaluate([A_test, X_test], y_test, verbose=0)
    print(f"  Test Loss: {test_loss}")