In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import torch
import scipy.sparse as sp
import torch.nn as nn
from node2vec import Node2Vec
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score
import itertools
import dgl

In [36]:
from dgl.nn import SAGEConv
from dgl.nn import GraphConv
import dgl.function as fn

# ----------- 2. create model -------------- #
# build a two-layer GraphSAGE model
class GraphSAGE(nn.Module):
    def __init__(self, in_feats, h_feats):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_feats, h_feats, 'mean')
        self.conv2 = SAGEConv(h_feats, h_feats, 'mean')

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

class GraphGCN(nn.Module):
    def __init__(self, in_feats, h_feats):
        super(GraphGCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats, allow_zero_in_degree=True)
        self.conv2 = GraphConv(h_feats, h_feats, allow_zero_in_degree=True)

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

class DotPredictor(nn.Module):
    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            # Compute a new edge feature named 'score' by a dot-product between the
            # source node feature 'h' and destination node feature 'h'.
            g.apply_edges(fn.u_dot_v('h', 'h', 'score'))
            # u_dot_v returns a 1-element vector for each edge so you need to squeeze it.
            return g.edata['score'][:, 0]

In [37]:
G_5day = nx.read_gexf("../graph_creation/2020-07-01__to__2020-07-05__15-overlap__hashmap.gexf")
G_6day = nx.read_gexf("../graph_creation/2020-07-01__to__2020-07-06__15-overlap__hashmap.gexf")

In [128]:
sorted(G_5day.nodes())

['000433eca9a8548de1efa8902b3b3b5d0ee9fa2acf653a265cc086aaaff08c94',
 '000b653fc77693dbe2b671e2deee055c7b4d2fda64e30699a24c39d3b0010847',
 '001934dbfcac5f7e59a52fa80566b61459171d8e21582cff59c834e913a0be0a',
 '001eceee3f4309be31b174ac3d6c6f6c1479c2a6f32cbf7e832ab77cea5a2d88',
 '00204200d3e08cb4e621c90fb4dda9c2b97342cb5ca0ec0cbe1643cff71d45f9',
 '0025a62b4e167aa5a8683ad08d6663087c00d731d8a35cb394e3ae958c5da341',
 '00273d0fa0e463f97c4195ccfa48a79ab8738d7d8b40004283321d4efe7a0f22',
 '0028a7fe632179123256fa13be33d4db14b31bf88634cd0989388636893ea9f9',
 '0029a0a56f7ae2119a137712a75c130dc3fe48e9ba60de281e7a96d86a6af770',
 '002a0f0ff8059f0f300c0c5a81516a370d2a48bc2ceba13ed5400554667f1134',
 '002a3ed02017ed512ac7d4d0616704ccf248d16052d6a3825468f6017cdff98c',
 '002c0d00e2b0a210e38748b296937a7784c2b05c4280414222a6e997766fb7a5',
 '00313e28d73e447e0f6814e1fde4aa312bb2c9e5a53adba701b48b11d172eede',
 '003242b714a532787f430f6b2f61cc8e920d229116623eb0daf8e280e7aeb8db',
 '003861e026d5298266b8c5eeff1ed18b

In [130]:
node_mapping = {i: x for i, x in enumerate(sorted(G_5day.nodes()))}

In [131]:
node_mapping[0]

'000433eca9a8548de1efa8902b3b3b5d0ee9fa2acf653a265cc086aaaff08c94'

In [132]:
count = 0
for edge in G_5day.edges():
    if (edge[0] == node_mapping[1]) or (edge[1] == node_mapping[1]):
        count += 1
print(count)

84


In [133]:
dgl_G_5day = dgl.from_networkx(G_5day)
dgl_G_6day = dgl.from_networkx(G_6day)

In [134]:
day5_edges = dgl_G_5day.edges()
day6_edges = dgl_G_6day.edges()
count_2 = 0
for i in range(len(day5_edges[0])):
    if (day5_edges[0][i] == 0) or (day5_edges[1][i] == 0):
        count_2 += 1
print(count_2)

168


In [137]:
len(day6_edges[0])

2003742

In [138]:
len(G_6day.edges())

1001871