In [1]:
import torch_geometric
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T

cora_dataset = Planetoid('../data/Cora',"Cora",transform=T.NormalizeFeatures())[0]

import torch
from pygod.generator import gen_contextual_outliers,gen_structural_outliers

cora_dataset, yc = gen_contextual_outliers(cora_dataset,n=100,k=50)
cora_dataset, ys = gen_structural_outliers(cora_dataset,m=10,n=10)

cora_dataset.y = yc.logical_or(ys).to(torch.long)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class Simplenet(nn.Module):
    
    def __init__(self,in_feat,h_feat,num_classes):
        super(Simplenet,self).__init__()

        self.conv1 = GCNConv(in_feat,h_feat)
        self.conv2 = GCNConv(h_feat,num_classes)

    def forward(self,data):
        feature, edges = data.x,data.edge_index
        feature = F.relu(self.conv1(feature,edges))
        feature = F.dropout(feature,training = self.training)
        feature = self.conv2(feature,edges)
        return feature

num_classes = cora_dataset.y.unique().size()[0]
model = Simplenet(cora_dataset.num_features,16,num_classes)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


Simplenet(
  (conv1): GCNConv(1433, 16)
  (conv2): GCNConv(16, 2)
)

In [19]:
model.train()
epochs = 200
lr = 1e-2
# weight decay is similar as L2 normalize, what different is weight decay matter in update strategy.
optimizer = torch.optim.Adam(model.parameters(),lr = lr,weight_decay=1e-3)

cora_dataset.x = cora_dataset.x.to(device)
cora_dataset.edge_index = cora_dataset.edge_index.to(device)
cora_dataset.y = cora_dataset.y.to(device)
rate = (1-cora_dataset.y).sum()/cora_dataset.y.sum()
for i in range(epochs):
    model.zero_grad()
    logit = model(cora_dataset)
    loss = F.cross_entropy(logit[cora_dataset.train_mask],cora_dataset.y[cora_dataset.train_mask],weight=torch.tensor([1.0,rate]).to(device))
    loss.backward()
    optimizer.step()
    print (f"epoch {i}/{epochs}: loss {loss};")

epoch 0/200: loss 0.033317968249320984;
epoch 1/200: loss 0.01973869651556015;
epoch 2/200: loss 0.015788733959197998;
epoch 3/200: loss 0.025798728689551353;
epoch 4/200: loss 0.0251594427973032;
epoch 5/200: loss 0.01944752410054207;
epoch 6/200: loss 0.02333417721092701;
epoch 7/200: loss 0.03221406415104866;
epoch 8/200: loss 0.03539278730750084;
epoch 9/200: loss 0.032997988164424896;
epoch 10/200: loss 0.031322378665208817;
epoch 11/200: loss 0.029849069193005562;
epoch 12/200: loss 0.03398548811674118;
epoch 13/200: loss 0.033562492579221725;
epoch 14/200: loss 0.030403638258576393;
epoch 15/200: loss 0.03825858235359192;
epoch 16/200: loss 0.030433187261223793;
epoch 17/200: loss 0.03082474321126938;
epoch 18/200: loss 0.03312817960977554;
epoch 19/200: loss 0.0318792350590229;
epoch 20/200: loss 0.028531739488244057;
epoch 21/200: loss 0.03930153697729111;
epoch 22/200: loss 0.034282829612493515;
epoch 23/200: loss 0.02836693823337555;
epoch 24/200: loss 0.03299897536635399;
e

In [20]:
import torch
model.eval()

logits = model(cora_dataset)
probs = F.log_softmax(logits,dim=1)
y_hat = probs.max(1)[1][cora_dataset.test_mask]
correct_rate = cora_dataset.y[cora_dataset.test_mask].eq(y_hat).sum()/cora_dataset.test_mask.sum()*100
print (f"Correct rate is {correct_rate}%")

Correct rate is 92.30000305175781%


In [23]:
# 200/2300
from sklearn.metrics import f1_score,roc_auc_score,accuracy_score,recall_score,precision_score,confusion_matrix,roc_curve

pred = y_hat.cpu()
target = cora_dataset.y[cora_dataset.test_mask].cpu()

print (accuracy_score(target, pred))
print (precision_score(target, pred))
print (recall_score(target, pred))

print (f1_score(target, pred))

print (roc_auc_score(target, F.softmax(logits,dim=1)[cora_dataset.test_mask].cpu().detach().numpy()[:,1]))
print (roc_curve(target, probs[cora_dataset.test_mask].cpu().detach().numpy()[:,0]))

print (confusion_matrix(target, pred))

ValueError: Found input variables with inconsistent numbers of samples: [140, 1000]