In [1]:
!pip3 install dgl
!pip3 install numpy
!pip3 install torch
!pip3 install networkx
!pip3 install matplotlib

!pip3 install wandb -qU

Collecting dgl
  Downloading dgl-1.1.3-cp310-cp310-manylinux1_x86_64.whl (6.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dgl
Successfully installed dgl-1.1.3
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m196.4/196.4 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.1/254.1 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [4]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

## Dataset

In [None]:
from dgl import save_graphs, load_graphs
from dgl.data.utils import makedirs, save_info, load_info

In [None]:
# Load graph and prepare
dataset, _ = load_graphs('./drive/MyDrive/Academic/Code/Dataset/tfinance/tfinance')
graph = dataset[0]
graph.ndata['label'] = graph.ndata['label'].argmax(1)

graph.ndata['label'] = graph.ndata['label'].long().squeeze(-1)
graph.ndata['feature'] = graph.ndata['feature'].float()

## Model

In [None]:
import dgl
from dgl.nn import GraphConv

import torch
import torch.nn as nn

In [None]:
class Model(nn.Module):
  def __init__(self, in_feats, h_feats, num_classes):
    super(Model, self).__init__()

    # Layer definitions
    self.conv1 = GraphConv(in_feats, h_feats)
    self.conv2 = GraphConv(h_feats, num_classes)

  def forward(self, g, in_feat):
    h = self.conv1(g, in_feat)
    h = F.relu(h)
    h = self.conv2(g, h)
    return h

## Training

In [None]:
import time
import numpy as np
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, recall_score, roc_auc_score, precision_score, confusion_matrix

# threshold adjusting for best macro f1
def get_best_f1(labels, probs):
    best_f1, best_thre = 0, 0
    for thres in np.linspace(0.05, 0.95, 19):
        preds = np.zeros_like(labels)
        preds[probs[:,1] > thres] = 1
        mf1 = f1_score(labels, preds, average='macro')
        if mf1 > best_f1:
            best_f1 = mf1
            best_thre = thres
    return best_f1, best_thre

# Training Method
def train(g, model):
  features = g.ndata['feature']
  labels = g.ndata['label']
  index = list(range(len(labels)))

  # Train Test Split
  idx_train, idx_rest, y_train, y_rest = train_test_split(
      index, labels[index], stratify=labels[index],
      train_size = 0.8, random_state = 7, shuffle=True
  )
  idx_valid, idx_test, y_valid, y_test = train_test_split(
      idx_rest, y_rest, stratify=y_rest,
      test_size = 0.67, random_state = 7, shuffle=True
  )

  train_mask = torch.zeros([len(labels)]).bool()
  val_mask = torch.zeros([len(labels)]).bool()
  test_mask = torch.zeros([len(labels)]).bool()

  train_mask[idx_train] = 1
  val_mask[idx_valid] = 1
  test_mask[idx_test] = 1

  # Optimizer
  optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

  # Inits
  best_f1, final_tf1, final_trec, final_tpre, final_tmf1, final_tauc = 0., 0., 0., 0., 0., 0.
  weight = (1-labels[train_mask]).sum().item() / labels[train_mask].sum().item()
  print('cross entropy weight: ', weight)

  # Main Loop
  time_start = time.time()
  for e in range(50):
      # TRAIN
      model.train()

      logits = model(g, features)
      loss = F.cross_entropy(logits[train_mask], labels[train_mask], weight=torch.tensor([1., weight]))

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      # EVAL
      model.eval()

      probs = logits.softmax(1)
      f1, thres = get_best_f1(labels[val_mask], probs[val_mask])
      preds = numpy.zeros_like(labels)
      preds[probs[:, 1] > thres] = 1

      trec = recall_score(labels[test_mask], preds[test_mask])
      tpre = precision_score(labels[test_mask], preds[test_mask])
      tmf1 = f1_score(labels[test_mask], preds[test_mask], average='macro')
      tauc = roc_auc_score(labels[test_mask], probs[test_mask][:, 1].detach().numpy())

      if best_f1 < f1:
          best_f1 = f1
          final_trec = trec
          final_tpre = tpre
          final_tmf1 = tmf1
          final_tauc = tauc
      print('Epoch {}, loss: {:.4f}, val mf1: {:.4f}, (best {:.4f})'.format(e, loss, f1, best_f1))

  time_end = time.time()
  print('time cost: ', time_end - time_start, 's')
  print('Test: REC {:.2f} PRE {:.2f} MF1 {:.2f} AUC {:.2f}'.format(final_trec*100,
                                                                    final_tpre*100, final_tmf1*100, final_tauc*100))
  return final_tmf1, final_tauc

In [None]:
in_feats = graph.ndata['feature'].shape[1]
h_dim = 16
num_classes = 2

model = Model(in_feats, h_dim, num_classes)
train(graph, model)

cross entropy weight:  20.834257975034674
Epoch 0, loss: 525.5538, val mf1: 0.0438, (best 0.0438)
Epoch 1, loss: 357.7287, val mf1: 0.0438, (best 0.0438)
Epoch 2, loss: 191.4831, val mf1: 0.0447, (best 0.0447)
Epoch 3, loss: 34.7851, val mf1: 0.2976, (best 0.2976)
Epoch 4, loss: 33.2142, val mf1: 0.7296, (best 0.7296)


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 5, loss: 87.6146, val mf1: 0.4883, (best 0.7296)


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 6, loss: 120.9149, val mf1: 0.4883, (best 0.7296)


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 7, loss: 135.3019, val mf1: 0.4883, (best 0.7296)


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 8, loss: 135.0923, val mf1: 0.4883, (best 0.7296)


  _warn_prf(average, modifier, msg_start, len(result))


Epoch 9, loss: 123.5727, val mf1: 0.4883, (best 0.7296)
Epoch 10, loss: 103.2674, val mf1: 0.5048, (best 0.7296)
Epoch 11, loss: 76.1943, val mf1: 0.5582, (best 0.7296)
Epoch 12, loss: 45.4221, val mf1: 0.7249, (best 0.7296)
Epoch 13, loss: 24.6617, val mf1: 0.5962, (best 0.7296)
Epoch 14, loss: 22.6383, val mf1: 0.4782, (best 0.7296)
Epoch 15, loss: 35.0336, val mf1: 0.3590, (best 0.7296)
Epoch 16, loss: 52.2278, val mf1: 0.2543, (best 0.7296)
Epoch 17, loss: 62.9559, val mf1: 0.1590, (best 0.7296)
Epoch 18, loss: 56.5272, val mf1: 0.2258, (best 0.7296)
Epoch 19, loss: 42.8709, val mf1: 0.3072, (best 0.7296)
Epoch 20, loss: 29.9381, val mf1: 0.3973, (best 0.7296)
Epoch 21, loss: 22.5381, val mf1: 0.4782, (best 0.7296)
Epoch 22, loss: 21.3507, val mf1: 0.5341, (best 0.7296)
Epoch 23, loss: 25.8055, val mf1: 0.6228, (best 0.7296)
Epoch 24, loss: 32.0344, val mf1: 0.7226, (best 0.7296)
Epoch 25, loss: 36.3553, val mf1: 0.7535, (best 0.7535)
Epoch 26, loss: 37.1228, val mf1: 0.7515, (best

(0.7370397572623554, 0.8489900540728711)