# Graph Neural Network Using Neo4J & PyTorch

### Environment

In [146]:
import pandas as pd

import matplotlib 
import matplotlib.pyplot as plt

plt.style.use('fivethirtyeight')
pd.set_option('display.float_format', lambda x: '%.3f' % x)

import pandas as pd
import numpy as np
from collections import Counter
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
from py2neo import Graph

In [15]:
graph = Graph("bolt://54.87.236.196:37389", auth=("neo4j", "throttle-retailers-jobs"))

#### EDA

In [46]:
query = """
MATCH (p:Person) RETURN count(*) as numNodes
"""
numNodes = graph.run(query).to_data_frame().iloc[0]['numNodes']
numNodes

34

In [6]:
# Check if there are self loops in the graph
query = """
MATCH (p:Person)-[]-(p)
RETURN p.id
"""
graph.run(query).to_data_frame()

In [10]:
# Find all ids of the nodes in the grapf
query = """
MATCH (p:Person)
RETURN collect(p.id) as allIds
"""
graph.run(query).to_data_frame().iloc[0]['allIds']

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34]

## Graph Operations and Functions

Setting features of all the nodes using a dictionary

In [216]:
# aux function 
def string_to_array(word):
    return [float(i) for i in word]

def array_to_string(array):
    word = ""
    for i in range(0, array.size):
        word += str(array[i])
    return word

def tensor_to_string(tensor):
    word = ""
    for i in tensor.tolist():
        word += str(i) + ","
    return word[:len(word)-1]

def string_to_tensor(string_input):
    return torch.tensor(list(map(float, string_input.split(","))))

In [387]:
def set_features(graph, features):
    for i in range(features.shape[0]):
        query = """
        MATCH (p:Person {id: $id_node}) SET p.features = $features
        """
        graph.run(query, {'id_node': i+1, 'features': tensor_to_string(features[i])})


In [389]:
features = torch.eye(34)
set_features(graph, features)

Either add self loops in the graph or take this into account when updating features of each node

Current approach:
- Self-loops present in the graph deleted
- When updating features, take into account own features

Future approach: 
- Add self-loops to the graph
- When updating features, only use neighbor features

In [435]:
def get_features(graph, id_node):
    query_own_features = """
    MATCH (p:Person {id: $id_node}) RETURN p.features as own_features
    """  
    own_features = graph.run(query_own_features, {"id_node": id_node}).to_data_frame().iloc[0]['own_features']
    return own_features

def get_features_tensor(graph, id_node):
    return string_to_tensor(get_features(graph, id_node))

def get_neighbors_features(graph, id_node):
    query_neighbors_features = """
    MATCH (p:Person {id: $id_node})-[:KNOWS]-(neighbor:Person)
    RETURN collect(neighbor.features) as neighbors_features
    """
    neighbor_features = graph.run(query_neighbors_features, {"id_node": id_node}).to_data_frame().iloc[0]['neighbors_features']
    return neighbor_features
def get_neighbors_features_tensor(graph, id_node):
    return list(map(string_to_tensor, get_neighbors_features(graph, id_node)))

def get_all_features(graph): 
    all_features = []
    all_neighbor_features = {}
    for i in range(1, numNodes + 1):
        i_features = get_features(graph, i)
        all_features.append(i_features)
        neighbor_features = get_neighbors_features(graph, i)
        all_neighbor_features[i] = neighbor_features
    return all_features, all_neighbor_features

def update_features(graph, id_node, all_features, all_neighbor_features):
    own_features = string_to_tensor(all_features[id_node])
    neighbors_features = list(map(string_to_tensor, all_neighbor_features[id_node]))
    neighbors_features_sum = torch.stack(neighbors_features, dim=0).sum(dim=0)
    new_features = torch.stack([neighbors_features_sum, own_features], dim=0)
    query_update_features = """
    MATCH (p:Person {id: $id_node}) SET p.features = $new_features
    """
    graph.run(query_update_features, {'id_node': id_node, 'new_features': tensor_to_string(new_features)})
    
def update_all_features(graph, all_features, all_neighbor_features):
    for i in range(1, numNodes + 1):
        update_features(graph, i, all_features, all_neighbor_features)

In [433]:
get_features_tensor(graph, 1)

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 0., 0., 0., 1.,
        0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.])

In [311]:
query_own_features = """
    MATCH (p:Person {id: $id_node}) RETURN p.features as own_features
    """  
own_features = graph.run(query_own_features, {"id_node": 34}).to_data_frame()
own_features

Unnamed: 0,own_features
0,


## Neural Network Creation

In [122]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [410]:
class GCNLayer(nn.Module):
    def __init__(self, input_size, output_size):
        super(GCNLayer, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
    def forward(self, graph, input_features):
        set_features(graph, torch.stack(list(map(string_to_tensor,list(input_features.values()))), dim=0))
        all_features, all_neighbor_features = get_all_features(graph)
        update_all_features(graph, all_features, all_neighbor_features)
        all_features_updated, _ = get_all_features(graph)
        # Transform into torch format
        all_features_tensor = torch.stack(list(map(string_to_tensor, all_features_updated.values())), dim=0)
        return self.linear(all_features_tensor)


In [425]:
class GCN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GCN, self).__init__()
        self.gcnlayer1 = GCNLayer(input_size, hidden_size)
        self.gcnlayer2 = GCNLayer(hidden_size, output_size)
    
    def forward(self, graph, input_features):
        outputlayer1 = self.gcnlayer1(graph, input_features)
        outputlayer1_relu = F.relu(outputlayer1)
        input_next_layer = {}
        for i in range(numNodes):
            input_next_layer[i+1] = outputlayer1_relu[i]
        outputlayer2 = self.gcnlayer2(graph, outputlayer1_relu)
        return outputlayer2

In [411]:
layer1 = GCNLayer(34, 5)
layer1

GCNLayer(
  (linear): Linear(in_features=34, out_features=5, bias=True)
)

In [413]:
out1 = layer1(graph, all_features)

In [427]:
input_next_layer = {}
for i in range(numNodes):
    input_next_layer[i+1] = out1[i]

In [428]:
input_next_layer

{1: tensor([ 0.5688,  0.3806, -0.7450,  0.3941,  0.3794], grad_fn=<SelectBackward>),
 2: tensor([ 0.1034, -0.3567, -0.5000,  0.1800,  0.1980], grad_fn=<SelectBackward>),
 3: tensor([ 0.3599,  0.0732, -0.7398,  0.5219,  0.6678], grad_fn=<SelectBackward>),
 4: tensor([ 0.0635,  0.0198, -0.4088,  0.3009,  0.2246], grad_fn=<SelectBackward>),
 5: tensor([ 0.3006,  0.2933, -0.1029, -0.0574,  0.2077], grad_fn=<SelectBackward>),
 6: tensor([ 0.3243,  0.3725, -0.0043, -0.3148,  0.0552], grad_fn=<SelectBackward>),
 7: tensor([ 0.2925,  0.4382, -0.0341, -0.3293, -0.0075], grad_fn=<SelectBackward>),
 8: tensor([ 0.0410, -0.2000, -0.4015,  0.2048,  0.2526], grad_fn=<SelectBackward>),
 9: tensor([ 0.3292, -0.4898, -0.2874,  0.1172,  0.1901], grad_fn=<SelectBackward>),
 10: tensor([ 0.2256, -0.3907, -0.0774,  0.2988,  0.3176], grad_fn=<SelectBackward>),
 11: tensor([ 0.1648,  0.3039, -0.2342, -0.0197,  0.0704], grad_fn=<SelectBackward>),
 12: tensor([ 0.0218, -0.0610, -0.0105, -0.1135, -0.0062], grad

In [376]:
query = """
MATCH (p:Person {id: 1}) SET p.features = $features
"""
graph.run(query, {'features': tensor_to_string(tes2_linear[0])})

<py2neo.database.Cursor at 0x7fb6b23c8850>

### Training

Setting labels por node 1 and 34 (semisupervised problem)

In [118]:
query_instructor = """
MATCH (p:Person) WHERE p.id = 1 SET p.label = 0 
"""
query_president = """
MATCH (p:Person) WHERE p.id = 34 SET p.label = 1 
"""
graph.run(query_instructor)
graph.run(query_president)

<py2neo.database.Cursor at 0x7fb6b3719890>

In [119]:
# Optimizer: Adam
optimizer = torch.optim.Adam(net.parameters(), lr = 0.01)

In [None]:
all_logits = []
num_epochs = 30
for epoch in range(num_epochs):
    logits = net(G, inputs)