In [None]:
import seaborn as sns

from collections import defaultdict

from data import PPRDataset, HeatDataset, set_train_val_test_split
from models import GraphConvNet
from util import train_seeds, test_seeds

### Define parameters

In [None]:
gdc_kernel = 'ppr'

dataset_params = {
    'name': 'Cora',
    'use_lcc': True,
    'alpha': 0.05,
    't_matrix': 'sym',
    'self_loops': 1.0,
    'k': 128,
    'eps': None,
    'sparse_normalization': 'sym_one'
}

### Load dataset

In [None]:
cora_ppr = PPRDataset(
    name='Cora',
    use_lcc=True,
    alpha=0.05,
    t_matrix='sym',
    self_loops=1.0,
    k=128,
    eps=None,
    sparse_normalization='sym_one'
)
cora_heat = HeatDataset(
    name='Cora',
    use_lcc=True,
    t=5,
    t_matrix='sym',
    self_loops=1.0,
    k=128,
    eps=None,
    sparse_normalization='sym_one'
)

### Create model

In [None]:
model = GraphConvNet(
    dataset,
    layer_type='GCN',
    hidden=64,
    dropout=0.5
)

### Train model

In [None]:
test = False
dataset = cora_ppr
seeds = test_seeds if test else train_seeds
patience = 100
max_epochs = 10000
optimizer = Adam(
    [
        {'params': model.non_reg_params, 'weight_decay': 0},
        {'params': model.reg_params, 'weight_decay': 0.05}
    ],
    lr=0.01
)

In [None]:
def train_semi_sup(model, optimizer, data):
    model.train()
    optimizer.zero_grad()
    logits = model(data)
    loss = F.nll_loss(
        logits[data.train_mask],
        data.y[data.train_mask]
    )
    loss.backward()
    optimizer.step()
    return loss.item()

In [None]:
def eval_semi_sup(model, data, test, num_classes):
    model.eval()
    with torch.no_grad():
        logits = model(data)
    eval_dict = {}
    keys = ['train', 'test'] if test else ['train', 'val']
    for key in keys:
        mask = data[f'{key}_mask']
        pred = logits[mask].max(1)[1]
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
        eval_dict['{}_acc'.format(key)] = acc
    return eval_dict

In [None]:
best_dict = defaultdict(list)

for seed in seeds:
    set_train_val_test_split(
        seed=seed,
        data=dataset.data
    )
    tmp_dict = {'val_acc': 0}
    patience_cnt = 0
    for epoch in range(1, max_epochs + 1):
        if patience_cnt == patience:
            break
        train_loss = train_semi_sup(
            model,
            optimizers,
            dataset.data
        )
        eval_dict = evaluate_semi_sup(
            model,
            dataset.data,
            test
        )
        if eval_dict['val_acc'] <= tmp_dict['val_acc']:
                patience_counter += 1
        else:
            patience_counter = 0
            tmp_dict['epoch'] = epoch
            tmp_dict.update(eval_dict)
    for k, v in tmp_dict.items():
        best_dict[k].append(v)

In [None]:
boots_series = sns.algorithms.bootstrap(
    best_dict['val_acc'],
    func=np.mean,
    n_boot=1000
)
best_dict['acc_ci'] = list(sns.utils.ci(boots_series, 95))