In [17]:
import time
import random
import argparse
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
from torch_geometric.datasets import CitationFull
import torch.nn.functional as F
from sklearn.metrics import f1_score

In [2]:
Citeseer=CitationFull("../data","Citeseer")

In [3]:
X=np.load("./CiteSeer.npy")

In [4]:
class Dense(nn.Module):

    def __init__(self, in_features, out_features, bias='none'):
        super(Dense, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = nn.Parameter(torch.FloatTensor(in_features, out_features))
        if bias == 'bn':
            self.bias = nn.BatchNorm1d(out_features)
        else:
            self.bias = lambda x: x
            
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / np.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)

    def forward(self, input):
        output = torch.mm(input, self.weight)
        output = self.bias(output)
        # 跳层连接
        if self.in_features == self.out_features:#??
            output = output + input
        return output

class GnnBP(nn.Module):
    def __init__(self, nfeat, nlayers,nhidden, nclass, dropout, bias):
        super(GnnBP, self).__init__()
        self.fcs = nn.ModuleList()
        self.fcs.append(Dense(nfeat, nhidden, bias))
        for _ in range(nlayers-2):
            self.fcs.append(Dense(nhidden, nhidden, bias))
        self.fcs.append(Dense(nhidden, nclass))
        self.act_fn = nn.ReLU()
        self.dropout = dropout

    def forward(self, x):
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.act_fn(self.fcs[0](x))
        for fc in self.fcs[1:-1]:
            x = F.dropout(x, self.dropout, training=self.training)
            x = self.act_fn(fc(x))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.fcs[-1](x)
        return x

In [5]:
model = GnnBP(nfeat=X.shape[1],
            nlayers=2,
            nhidden=64,
            nclass=6,
            dropout=0.5,
            bias = None)

In [6]:
Vt=[]
yset=set([i.item() for i in Citeseer.data.y])
for y in yset:
    for i in range(40):
        t=np.random.randint(0,X.shape[0])
        while(Citeseer.data.y[t].item()!=y):
            t=np.random.randint(0,X.shape[0])
        Vt.append(t)
Vv=[i for i in range(X.shape[0]) if i not in Vt]
trainX=X[Vt]
testX=X[Vv]
trainY=Citeseer.data.y[Vt]
testY=Citeseer.data.y[Vv]

In [9]:
optimizer = optim.Adam(model.parameters(), lr=0.01, 
    weight_decay=5e-4)
loss_fn = nn.CrossEntropyLoss()

In [10]:
def train():
    model.train()
    loss_list = []
    time_epoch = 0
    for step, (batch_x, batch_y) in enumerate(loader):
        t1 = time.time()
        optimizer.zero_grad()
        output = model(batch_x)
        loss_train = loss_fn(output, batch_y)
        loss_train.backward()
        optimizer.step()
        time_epoch+=(time.time()-t1)
        loss_list.append(loss_train.item())
    return np.mean(loss_list),time_epoch

In [11]:
def accuracy(output, labels):
    preds = output.max(1)[1].type_as(labels)
    correct = preds.eq(labels).double()
    correct = correct.sum()
    return correct / len(labels)

def muticlass_f1(output, labels):
    preds = output.max(1)[1]
    preds = preds.cpu().detach().numpy()
    labels = labels.cpu().detach().numpy()
    micro = f1_score(labels, preds, average='micro')
    return micro

def mutilabel_f1(y_true, y_pred):
    y_pred[y_pred > 0] = 1
    y_pred[y_pred <= 0] = 0
    return f1_score(y_true, y_pred, average="micro")

In [15]:
def test():
    model.eval()
    with torch.no_grad():
        output = model(torch.Tensor(testX))
        micro_val = muticlass_f1(output, testY)
        return micro_val.item()

In [22]:
def getVt(size,nV,y):
    Vt=[]
    yset=set([i.item() for i in Citeseer.data.y])
    for y in yset:
        for i in range(size):
            t=np.random.randint(0,X.shape[0])
            while(Citeseer.data.y[t].item()!=y):
                t=np.random.randint(0,X.shape[0])
            Vt.append(t)
    return Vt

In [23]:
for size in range(5,80,5):
    Vt=getVt(size,Citeseer.data.num_nodes,Citeseer.data.y)
    Vv=[i for i in range(X.shape[0]) if i not in Vt]
    trainX=X[Vt]
    testX=X[Vv]
    trainY=Citeseer.data.y[Vt]
    testY=Citeseer.data.y[Vv]


    torch_dataset = Data.TensorDataset(torch.Tensor(trainX), trainY)
    loader = Data.DataLoader(dataset=torch_dataset,
                            batch_size=64,
                            shuffle=True,
                            num_workers=0)

    train_time=0
    for epoch in range(50):
        loss_tra,train_ep = train()
        f1_val = test()
        train_time+=train_ep
        if(epoch+1)%50 == 0: 
                print('Epoch:{:04d}'.format(epoch+1),
                    'train',
                    'loss:{:.3f}'.format(loss_tra),
                    '| val',
                    'acc:{:.3f}'.format(f1_val),
                    '| cost{:.3f}'.format(train_time))

Epoch:0050 train loss:0.029 | val acc:0.861 | cost0.089
Epoch:0050 train loss:0.160 | val acc:0.866 | cost0.150
Epoch:0050 train loss:0.113 | val acc:0.848 | cost0.196
Epoch:0050 train loss:0.229 | val acc:0.866 | cost0.273
Epoch:0050 train loss:0.194 | val acc:0.845 | cost0.328
Epoch:0050 train loss:0.159 | val acc:0.895 | cost0.271
Epoch:0050 train loss:0.265 | val acc:0.872 | cost0.417
Epoch:0050 train loss:0.163 | val acc:0.876 | cost0.312
Epoch:0050 train loss:0.316 | val acc:0.893 | cost0.405
Epoch:0050 train loss:0.194 | val acc:0.873 | cost0.389
Epoch:0050 train loss:0.225 | val acc:0.881 | cost0.556
Epoch:0050 train loss:0.226 | val acc:0.896 | cost0.518
Epoch:0050 train loss:0.248 | val acc:0.886 | cost0.647
Epoch:0050 train loss:0.202 | val acc:0.887 | cost0.585
Epoch:0050 train loss:0.240 | val acc:0.894 | cost0.687
