In [62]:
import torch
from dgl.nn.pytorch import GATConv as GATConvDGL, GraphConv, ChebConv as ChebConvDGL, \
    AGNNConv as AGNNConvDGL, APPNPConv
from torch.nn import Dropout, ELU, Sequential, Linear, ReLU
import torch.nn.functional as F
from BGNN import BGNNPredictor
import pandas as pd
import numpy as np
import json 
import os
import networkx as nx
import dgl
from dgl.data.utils import save_graphs, load_graphs

In [185]:
from dgl.nn.pytorch import GATConv as GATConvDGL, GraphConv, ChebConv as ChebConvDGL, \
    AGNNConv as AGNNConvDGL, APPNPConv
from torch.nn import Dropout, ELU, Sequential, Linear, ReLU
import torch.nn.functional as F

In [186]:
class GNNModelDGL(torch.nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim,
                 dropout=0., name='gat', residual=True, use_mlp=False, join_with_mlp=False):
        super(GNNModelDGL, self).__init__()
        self.name = name
        self.use_mlp = use_mlp
        self.join_with_mlp = join_with_mlp
        self.normalize_input_columns = True
        if name == 'gat':
            self.l1 = GATConvDGL(in_dim, hidden_dim//8, 8, feat_drop=dropout, attn_drop=dropout, residual=False,
                              activation=F.elu)
            self.l2 = GATConvDGL(hidden_dim, out_dim, 1, feat_drop=dropout, attn_drop=dropout, residual=residual, activation=None)
        elif name == 'gcn':
            self.l1 = GraphConv(in_dim, hidden_dim, activation=F.elu)
            self.l2 = GraphConv(hidden_dim, out_dim, activation=F.elu)
            self.drop = Dropout(p=dropout)
        elif name == 'cheb':
            self.l1 = ChebConvDGL(in_dim, hidden_dim, k = 3)
            self.l2 = ChebConvDGL(hidden_dim, out_dim, k = 3)
            self.drop = Dropout(p=dropout)
        elif name == 'agnn':
            self.lin1 = Sequential(Dropout(p=dropout), Linear(in_dim, hidden_dim), ELU())
            self.l1 = AGNNConvDGL(learn_beta=False)
            self.l2 = AGNNConvDGL(learn_beta=True)
            self.lin2 = Sequential(Dropout(p=dropout), Linear(hidden_dim, out_dim), ELU())
        elif name == 'appnp':
            self.lin1 = Sequential(Dropout(p=dropout), Linear(in_dim, hidden_dim),
                       ReLU(), Dropout(p=dropout), Linear(hidden_dim, out_dim))
            self.l1 = APPNPConv(k=10, alpha=0.1, edge_drop=0.)


    def forward(self, graph, features):
        h = features
        if self.use_mlp:
            if self.join_with_mlp:
                h = torch.cat((h, self.mlp(features)), 1)
            else:
                h = self.mlp(features)
        if self.name == 'gat':
            h = self.l1(graph, h).flatten(1)
            logits = self.l2(graph, h).mean(1)
        elif self.name in ['appnp']:
            h = self.lin1(h)
            logits = self.l1(graph, h)
        elif self.name == 'agnn':
            h = self.lin1(h)
            h = self.l1(graph, h)
            h = self.l2(graph, h)
            logits = self.lin2(h)
        elif self.name in ['gcn', 'cheb']:
            h = self.drop(h)
            h = self.l1(graph, h)
            logits = self.l2(graph, h)

        return logits


In [309]:
input_folder = 'datasets/house_class'

In [291]:
networkx_graph = nx.read_graphml(f'{input_folder}/graph.graphml')
networkx_graph = nx.relabel_nodes(networkx_graph, {str(i): i for i in range(len(networkx_graph))})

g = dgl.from_networkx(networkx_graph)
save_graphs(f'{input_folder}/graph.dgl', g)

In [310]:
X = pd.read_csv(f'{input_folder}/X.csv')
y = pd.read_csv(f'{input_folder}/y.csv')

categorical_columns = []
if os.path.exists(f'{input_folder}/cat_features.txt'):
    with open(f'{input_folder}/cat_features.txt') as f:
        for line in f:
            if line.strip():
                categorical_columns.append(line.strip())

cat_features = None
if categorical_columns:
    columns = X.columns
    cat_features = np.where(columns.isin(categorical_columns))[0]

    for col in list(columns[cat_features]):
        X[col] = X[col].astype(str)
    
gs, _ = load_graphs(f'{input_folder}/graph.dgl')
graph = gs[0]

with open(f'{input_folder}/masks.json') as f:
    masks = json.load(f)
    
train_mask, val_mask, test_mask = masks['0']['train'], masks['0']['val'], masks['0']['test']


In [314]:
task = 'classification'
hidden_dim = 128
trees_per_epoch = 5
backprop_per_epoch = 5
lr=0.1
append_gbdt_pred = False
train_input_features = False
gbdt_depth=6
gbdt_lr=0.1




out_dim = y.shape[1] if task == 'regression' else len(set(y.iloc[test_mask, 0]))
in_dim = out_dim + X.shape[1] if append_gbdt_pred else out_dim
        
gnn_model = GNNModelDGL(in_dim, hidden_dim, out_dim)


In [294]:
import BGNN 
import sys
import importlib
importlib.reload(sys.modules['BGNN'])
from BGNN import BGNNPredictor

bgnn = BGNNPredictor(gnn_model, task = task,
                 loss_fn = None,
                 trees_per_epoch = trees_per_epoch,
                 backprop_per_epoch = backprop_per_epoch,
                 lr=lr,
                 append_gbdt_pred = append_gbdt_pred,
                 train_input_features = train_input_features,
                 gbdt_depth=gbdt_depth,
                 gbdt_lr=gbdt_lr)

metrics = bgnn.fit(graph, X, y, train_mask, val_mask, test_mask, cat_features, 
        num_epochs=100, patience=10, metric_name='loss',
            normalize_features=True)

bgnn.plot_interactive(metrics, legend=['train', 'valid', 'test'], title='Avazu', metric_name='loss')

Epoch 00014 | Loss 0.140 | Loss 0.119/0.123/0.113 | Time 0.0807:  14%|█▍        | 14/100 [00:01<00:08, 10.54it/s]

Best loss at iteration 3: 0.103/0.108/0.099





In [315]:
import BGNN 
import sys
import importlib
importlib.reload(sys.modules['BGNN'])
from BGNN import BGNNPredictor

bgnn = BGNNPredictor(gnn_model, task = task,
                 loss_fn = None,
                 trees_per_epoch = trees_per_epoch,
                 backprop_per_epoch = backprop_per_epoch,
                 lr=lr,
                 append_gbdt_pred = append_gbdt_pred,
                 train_input_features = train_input_features,
                 gbdt_depth=gbdt_depth,
                 gbdt_lr=gbdt_lr)

metrics = bgnn.fit(graph, X, y, train_mask, val_mask, test_mask, cat_features, 
        num_epochs=100, patience=10, metric_name='loss',
            normalize_features=True)


preds = bgnn.predict(graph, X, test_mask)

Epoch 00011 | Loss 0.065 | Loss 0.044/4.107/4.172 | Time 0.1813:  11%|█         | 11/100 [00:02<00:17,  4.96it/s]


Best loss at iteration 0: 0.826/1.113/1.088


In [316]:
preds

tensor([1, 1, 3,  ..., 3, 4, 1], device='cuda:0')

In [163]:
from BGNN import BGNNPredictor

In [None]:
BGNNPredictor