In [1]:
from platform import python_version

print(python_version())


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import scipy.sparse as sp
import torch
import torch_geometric
from torch_geometric.datasets import Planetoid

from conformal_lp.src import process, conformal_link_prediction
from conformal_lp.models import SEAL_wrapper, CommonNeighbors
from conformal_lp.gnn import GIN
from conformal_lp.utils import make_loaders_SEAL, get_fdp

3.10.8


> The input data must be of type Pytorch Geometric Dataset

In [2]:
dataset = Planetoid(root='./data/',name='Cora')

> Split the data into train / test / calibration / validation samples

In [3]:
val_ratio=0.
test_ratio=0.1
calib_size = 1000
directed=False

data, split_edge = process(dataset, calib_size=calib_size, val_ratio=val_ratio, test_ratio=test_ratio, 
                           directed=directed)

> To apply the method, we use `conformal_link_prediction(train_loader, test_loader, calib_loader, val_loader, model, level)`, where 

> * `model`: has a `.train()` method taking as input a `train_loader` and a `val_loader`, and a `.test()` method taking as input a `test_loader` and returning the tuple (scores for true test edges, scores for false test edges)
> * the format of `train_loader`/`val_loader`/`test_loader`/`calib_loader` depends on `model`, see code. 

SEAL

In [4]:
num_hops=2
use_feature=True
path=dataset.root+'_seal'
train_loader, test_loader, calib_loader, val_loader=make_loaders_SEAL(path, data, split_edge, 
                                                                      num_hops=num_hops, directed=directed)



In [5]:
num_features = dataset[0].x.shape[1] if use_feature else None
net = GIN(hidden_channels=32, num_layers=3, max_z=1000, num_features=num_features,  
                    use_feature=use_feature, node_embedding=None)
model=SEAL_wrapper(model=net, use_feature=use_feature, num_epochs=10, lr=1e-3)

level=0.2 
rej_set, test_labels = conformal_link_prediction(train_loader, test_loader, calib_loader, val_loader, model, level)

Control is at level $\alpha \pi_0$ where $\pi_0$ is the proportion of nulls in the test sample

In [6]:
fdp, tdp = get_fdp(test_labels, rej_set)
print("fdp", fdp); print("tdp",tdp)

fdp 0.09853249475890985
tdp 0.8159392789373814


CN

In [4]:
model=CommonNeighbors(data=data, split_edge=split_edge)

level=0.2
rej_set, test_labels = conformal_link_prediction(train_loader=None, 
                                                 test_loader='test', calib_loader='calib', 
                                                 val_loader=None, model=model, level=level)

100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 177.03it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 249.02it/s]
100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 229.52it/s]


In [5]:
fdp, tdp = get_fdp(test_labels, rej_set)
print("fdp", fdp); print("tdp",tdp)

fdp 0.011904761904761904
tdp 0.47248576850094876
