-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
130 lines (106 loc) · 4.17 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import argparse
import sys
import os, random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.utils import to_undirected
from torch_scatter import scatter
from torch_geometric.data import ShaDowKHopSampler
from logger import Logger
from dataset import *
from data_utils import normalize, gen_normalized_adjs, to_sparse_tensor, \
load_fixed_splits, rand_splits, get_gpu_memory_map, count_parameters, reindex_env
from eval import evaluate_full, eval_acc, eval_rocauc, eval_f1
from parse import parser_add_main_args
from model import *
import time
# NOTE: for consistent data splits, see data_utils.rand_train_test_idx
def fix_seed(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
### Parse args ###
parser = argparse.ArgumentParser(description='General Training Pipeline')
parser_add_main_args(parser)
args = parser.parse_args()
print(args)
fix_seed(args.seed)
if args.cpu:
device = torch.device("cpu")
else:
device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
### Load and preprocess data ###
# multi-graph datasets, divide graphs into train/valid/test
if args.dataset == 'twitch':
dataset = load_twitch_dataset(args.data_dir, train_num=3)
elif args.dataset == 'elliptic':
dataset = load_elliptic_dataset(args.data_dir, train_num=5)
# single-graph datasets, divide nodes into train/valid/test
elif args.dataset == 'arxiv':
dataset = load_arxiv_dataset(args.data_dir, train_num=3)
# synthetic datasets, add spurious node features
elif args.dataset in ('cora', 'citeseer', 'pubmed'):
dataset = load_synthetic_dataset(args.data_dir, args.dataset, train_num=3, combine=args.combine_result)
else:
raise ValueError('Invalid dataname')
if len(dataset.y.shape) == 1:
dataset.y = dataset.y.unsqueeze(1)
c = max(dataset.y.max().item() + 1, dataset.y.shape[1])
d = dataset.x.shape[1]
n = dataset.num_nodes
print(f"dataset {args.dataset}: all nodes {dataset.num_nodes} | edges {dataset.edge_index.size(1)} | "
+ f"classes {c} | feats {d}")
print(f"train nodes {dataset.train_idx.shape[0]} | valid nodes {dataset.valid_idx.shape[0]} | "
f"test in nodes {dataset.test_in_idx.shape[0]}")
m = ""
for i in range(len(dataset.test_ood_idx)):
m += f"test ood{i+1} nodes {dataset.test_ood_idx[i].shape[0]} "
print(m)
print(f'[INFO] env numbers: {dataset.env_num} train env numbers: {dataset.train_env_num}')
### Load method ###
is_multilabel = args.dataset in ('proteins', 'ppi')
model = CaNet(d, c, args, device).to(device)
if args.dataset in ('elliptic', 'twitch'):
criterion = nn.BCEWithLogitsLoss(reduction='mean')
else:
criterion = nn.CrossEntropyLoss(reduction='mean')
if args.dataset in ('twitch'):
eval_func = eval_rocauc
elif args.dataset in ('elliptic'):
eval_func = eval_f1
else:
eval_func = eval_acc
logger = Logger(args.runs, args)
model.train()
print('MODEL:', model)
tr_acc, val_acc = [], []
### Training loop ###
for run in range(args.runs):
model.reset_parameters()
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
best_val = float('-inf')
dataset.x, dataset.y, dataset.edge_index, dataset.env = \
dataset.x.to(device), dataset.y.to(device), dataset.edge_index.to(device), dataset.env.to(device)
for epoch in range(args.epochs):
model.train()
optimizer.zero_grad()
loss = model.loss_compute(dataset, criterion, args)
loss.backward()
optimizer.step()
result = evaluate_full(model, dataset, eval_func)
logger.add_result(run, result)
tr_acc.append(result[0])
val_acc.append(result[2])
if epoch % args.display_step == 0:
m = f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {100 * result[0]:.2f}%, Valid: {100 * result[1]:.2f}%, Test In: {100 * result[2]:.2f}% '
for i in range(len(result)-3):
m += f'Test OOD{i+1}: {100 * result[i+3]:.2f}% '
print(m)
logger.print_statistics(run)
logger.print_statistics()
if args.store_result:
logger.output(args)