# Model Training
Here we will build and train our model using the graphs we previously built.

In [1]:
import numpy as np
import pandas as pd
import torch
import dgl
import scipy
import networkx as nx

from dgl.data.utils import save_graphs, load_graphs, split_dataset

import dgl.nn as dglnn
import torch.nn as nn
import torch.nn.functional as F

Using backend: pytorch


## Load Data

In [2]:
#load our graphs from before
glist, label_dict = load_graphs("./data_final.bin")
unsup_graph, sup_graph = glist[0], glist[1]

## Prepare Data for Training

In [3]:
#add self loops to both graphs
sup_graph = sup_graph.add_self_loop()
unsup_graph = unsup_graph.add_self_loop()

In [37]:
sup_split = split_dataset(sup_graph, shuffle=True, random_state=10)
unsup_split = split_dataset(sup_graph, shuffle=True, random_state=10)



In [38]:
len(sup_graph)

[<dgl.data.utils.Subset at 0x149004b9bba8>,
 <dgl.data.utils.Subset at 0x149004b9b0f0>,
 <dgl.data.utils.Subset at 0x149004b9b780>]

In [22]:
#define input data and do train/val/test split
node_features = graph.ndata['feat']
node_labels = graph.ndata['label']
train_mask = graph.ndata['train_mask']
valid_mask = graph.ndata['val_mask']
test_mask = graph.ndata['test_mask']
n_features = node_features.shape[1]
n_labels = int(node_labels.max().item() + 1)

tensor(1.)

## Build Model

In [26]:
class SAGE(nn.Module):
    def __init__(self, in_feats, hid_feats, out_feats):
        super().__init__()
        self.conv1 = dglnn.SAGEConv(
            in_feats=in_feats, out_feats=hid_feats, aggregator_type='pool')
        self.conv2 = dglnn.SAGEConv(
            in_feats=hid_feats, out_feats=out_feats, aggregator_type='mean')

    def forward(self, graph, inputs):
        # inputs are features of nodes
        h = self.conv1(graph, inputs)
        h = F.relu(h)
        h = self.conv2(graph, h)
        return h

In [27]:
class GCN(torch.nn.Module):

    def __init__(self, in_feats, n_hidden, n_classes, n_layers, activation, dropout):
        super(GCN, self).__init__()
        self.layers = torch.nn.ModuleList()
        # input layer
        self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
        # hidden layers
        for i in range(n_layers - 1):
            self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
        # output layer
        self.layers.append(GraphConv(n_hidden, n_classes))
        self.dropout = torch.nn.Dropout(p=dropout)

    def forward(self, g):
        h = g.ndata['vec']
        for i, layer in enumerate(self.layers):
            if i != 0:
                h = self.dropout(h)
            h = layer(g, h)
        return h

Help on class SAGEConv in module dgl.nn.pytorch.conv.sageconv:

class SAGEConv(torch.nn.modules.module.Module)
 |  Description
 |  -----------
 |  GraphSAGE layer from paper `Inductive Representation Learning on
 |  Large Graphs <https://arxiv.org/pdf/1706.02216.pdf>`__.
 |  
 |  .. math::
 |      h_{\mathcal{N}(i)}^{(l+1)} &= \mathrm{aggregate}
 |      \left(\{h_{j}^{l}, \forall j \in \mathcal{N}(i) \}\right)
 |  
 |      h_{i}^{(l+1)} &= \sigma \left(W \cdot \mathrm{concat}
 |      (h_{i}^{l}, h_{\mathcal{N}(i)}^{l+1}) \right)
 |  
 |      h_{i}^{(l+1)} &= \mathrm{norm}(h_{i}^{l})
 |  
 |  Parameters
 |  ----------
 |  in_feats : int, or pair of ints
 |      Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`.
 |  
 |      GATConv can be applied on homogeneous graph and unidirectional
 |      `bipartite graph <https://docs.dgl.ai/generated/dgl.bipartite.html?highlight=bipartite>`__.
 |      If the layer applies on a unidirectional bipartite graph, ``in_feats``
 |  