In [1]:
import numpy as np
import time
import networkx as nx
import torch
import torch.utils
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import argparse

import heapq as hp

from graph_data import GraphData
from data_reader import DataReader
from models import GNN
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from itertools import chain

from sklearn import preprocessing
#from IPython.core.debugger import Tracer
from torch_geometric.utils import precision, recall, f1_score,true_positive, true_negative, false_positive, false_negative

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Experiment parameters
'''
----------------------------
Dataset  |   batchnorm_dim
----------------------------
MUTAG    |     28
PTC_MR   |     64
BZR      |     57
COX2     |     56
COX2_MD  |     36
BZR-MD   |     33
PROTEINS |    620
D&D      |   5748
'''
## 创建 ArgumentParser 对象
parser = argparse.ArgumentParser()

#parser.add_argument 是 Python 中 argparse 模块的一部分，用于向参数解析器添加命令行参数#的定义。通过调用这个方法，你可以指定脚本在运行时可以接受哪些参数、这些参数的类型、默认值以及#帮助信息。
## 添加命令行参数
parser.add_argument('--device', default='cuda', help='Select CPU/CUDA for training.')
parser.add_argument('--dataset', default='Applications', help='Dataset name.')
parser.add_argument('--epochs', type=int, default=10, help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.0025, help='Initial learning rate.')
parser.add_argument('--wdecay', type=float, default=2e-3, help='Weight decay (L2 loss on parameters).')
parser.add_argument('--batch_size', type=int, default=64, help='Batch size.')
parser.add_argument('--hidden_dim', type=int, default=64, help='Number of hidden units.')
parser.add_argument('--n_layers', type=int, default=2, help='Number of MLP layers for GraphSN.')
parser.add_argument('--batchnorm_dim', type=int, default=30, help='Batchnormalization dimension for GraphSN layer.')
parser.add_argument('--dropout_1', type=float, default=0.25, help='Dropout rate for concatenation the outputs.')
parser.add_argument('--dropout_2', type=float, default=0.25, help='Dropout rate for MLP layers in GraphSN.')
parser.add_argument('--n_folds', type=int, default=1, help='Number of folds in cross validation.')
parser.add_argument('--threads', type=int, default=0, help='Number of threads.')
parser.add_argument('--log_interval', type=int, default=10 , help='Log interval for visualizing outputs.')
parser.add_argument('--seed', type=int, default=117, help='Random seed.')

_StoreAction(option_strings=['--seed'], dest='seed', nargs=None, const=None, default=117, type=<class 'int'>, choices=None, help='Random seed.', metavar=None)

In [3]:
# 解析命令行参数
args = parser.parse_args("")
# 使用解析后的参数
#print("Device:", args.device)
#print("Dataset:", args.dataset)
#print("Epochs:", args.epochs)
#print("Learning Rate:", args.lr)

In [None]:
print('Loading data')

datareader = DataReader(data_dir='./data/%s/' % args.dataset.upper(),
                        fold_dir=None,
                        rnd_state=np.random.RandomState(args.seed),
                        folds=args.n_folds,
                        use_cont_node_attr=False)



In [None]:
# 图的数量
dataset_length = len(datareader.data['adj_list'])
for itr in np.arange(dataset_length):
    # 每个图的矩阵
    A_array = datareader.data['adj_list'][itr]
    G = nx.from_numpy_matrix(A_array)

    sub_graphs = []
    subgraph_nodes_list = []
    sub_graphs_adj = []
    sub_graph_edges = []
    new_adj = torch.zeros(A_array.shape[0], A_array.shape[0])
    
    # 每个图的子图
    for i in np.arange(len(A_array)):
        s_indexes = []
        for j in np.arange(len(A_array)):
            s_indexes.append(i)
            #if(A_array[i][j]==1):
            if(A_array[i][j]!=0):
                s_indexes.append(j)
        sub_graphs.append(G.subgraph(s_indexes))

 
    # 每个图的每个子图的节点
    for i in np.arange(len(sub_graphs)):
        subgraph_nodes_list.append(list(sub_graphs[i].nodes))

    # 每个图的每个子图矩阵
    for index in np.arange(len(sub_graphs)):
        sub_graphs_adj.append(nx.adjacency_matrix(sub_graphs[index]).toarray())
    #print("sub_graphs_adj:", sub_graphs_adj)


    # 每个图的每个子图的边的数量
    for index in np.arange(len(sub_graphs)):
        sub_graph_edges.append(sub_graphs[index].number_of_edges())

    # 每个图(包含每个图的子图)的新的矩阵
    for node in np.arange(len(subgraph_nodes_list)):
        sub_adj = sub_graphs_adj[node]
        for neighbors in np.arange(len(subgraph_nodes_list[node])):
            index = subgraph_nodes_list[node][neighbors]
            count = torch.tensor(0).float()
            if(index==node):
                continue
            else:
                c_neighbors = set(subgraph_nodes_list[node]).intersection(subgraph_nodes_list[index])
                if index in c_neighbors:
                    nodes_list = subgraph_nodes_list[node]
                    sub_graph_index = nodes_list.index(index)
                    c_neighbors_list = list(c_neighbors)
                    #print(len(c_neighbors))
                    for i, item1 in enumerate(nodes_list):
                        if(item1 in c_neighbors):
                            for item2 in c_neighbors_list:
                                j = nodes_list.index(item2)
                                count += sub_adj[i][j]

                new_adj[node][index] = count / 2
                new_adj[node][index] = new_adj[node][index]/(len(c_neighbors)*(len(c_neighbors)-1))
                new_adj[node][index] = new_adj[node][index] * (len(c_neighbors) ** 2)

    weight = torch.FloatTensor(new_adj)
    weight = weight / weight.sum(1, keepdim=True)

    weight = weight + torch.FloatTensor(A_array)

    coeff = weight.sum(1, keepdim=True)
    coeff = torch.diag((coeff.T)[0])

    weight = weight + coeff

    weight = weight.detach().numpy()
    #weight = np.nan_to_num(weight, nan=0)
    weight = np.nan_to_num(weight)

    datareader.data['adj_list'][itr] = weight

In [None]:
acc_folds = []
#accuracy_arr = np.zeros((10, args.epochs), dtype=float)
accuracy_arr = np.zeros((1, args.epochs), dtype=float)
for fold_id in range(args.n_folds):
    print('\nFOLD', fold_id)
    loaders = []
    for split in ['train', 'test']:
        gdata = GraphData(fold_id=fold_id,
                             datareader=datareader,
                             split=split)

        loader = torch.utils.data.DataLoader(gdata, 
                                             batch_size=args.batch_size,
                                             shuffle=split.find('train') >= 0,
                                             num_workers=args.threads)
        loaders.append(loader)
    #print(loaders)
    
    model = GNN(input_dim=loaders[0].dataset.features_dim,
                hidden_dim=args.hidden_dim,
                output_dim=loaders[0].dataset.n_classes,
                n_layers=args.n_layers,
                batchnorm_dim=args.batchnorm_dim, 
                dropout_1=args.dropout_1, 
                dropout_2=args.dropout_2).to(args.device)

    print('\nInitialize model')
    print(model)
    c = 0
    for p in filter(lambda p: p.requires_grad, model.parameters()):
        c += p.numel()
    print('N trainable parameters:', c)

    optimizer = optim.Adam(
                filter(lambda p: p.requires_grad, model.parameters()),
                lr=args.lr,
                weight_decay=args.wdecay,
                betas=(0.5, 0.999))
    
    scheduler = lr_scheduler.MultiStepLR(optimizer, [20, 30], gamma=0.5)

    def train(train_loader):
        #scheduler.step()
        model.train()
        start = time.time()
        train_loss, n_samples = 0, 0
        for batch_idx, data in enumerate(train_loader):
            for i in range(len(data)):
                data[i] = data[i].to(args.device)
            optimizer.zero_grad()
            output = model(data)
            loss = loss_fn(output, data[4])
            loss.backward()
            optimizer.step()
            time_iter = time.time() - start
            train_loss += loss.item() * len(output)
            n_samples += len(output)
            scheduler.step()
            if batch_idx % args.log_interval == 0 or batch_idx == len(train_loader) - 1:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} (avg: {:.6f}) \tsec/iter: {:.4f}'.format(
                    epoch, n_samples, len(train_loader.dataset),
                    100. * (batch_idx + 1) / len(train_loader), loss.item(), train_loss / n_samples, time_iter / (batch_idx + 1) ))
            #scheduler.step()

    def test(test_loader):
        model.eval()
        start = time.time()
        test_loss, correct, n_samples = 0, 0, 0
        preds_list = []
        data_list = []
        for batch_idx, data in enumerate(test_loader):
            for i in range(len(data)):
                data[i] = data[i].to(args.device)
            output = model(data)
            loss = loss_fn(output, data[4], reduction='sum')
            test_loss += loss.item()
            n_samples += len(output)
            pred = output.detach().cpu().max(1, keepdim=True)[1]
            #pred = output.detach().cuda().max(1, keepdim=True)[1]
            ##################################
            data_list += data[4].tolist()
            preds_list += pred.tolist()
            #################################

            correct += pred.eq(data[4].detach().cpu().view_as(pred)).sum().item()
            #correct += pred.eq(data[4].detach().cuda().view_as(pred)).sum().item()
        labels = torch.Tensor(data_list)
        preds = torch.Tensor(preds_list)

        time_iter = time.time() - start

        test_loss /= n_samples

        acc = 100. * correct / n_samples

        #############################################################################
        classnums = 21
        r = recall(preds, labels.view_as(preds), classnums)
        p = precision(preds, labels.view_as(preds), classnums)
        f1 = f1_score(preds, labels.view_as(preds), classnums)
        fp = false_positive(preds, labels.view_as(preds), classnums)
        fn = false_negative(preds, labels.view_as(preds), classnums)
        tp = true_positive(preds, labels.view_as(preds), classnums)
        tn = true_negative(preds, labels.view_as(preds), classnums)

        r = (r.numpy()).round(7)
        p = (p.numpy()).round(7)
        f1 = (f1.numpy()).round(7)
        fp = fp.numpy()
        fn = fn.numpy()
        tp = tp.numpy()
        tn = tn.numpy()
        tpr = []
        fpr = []
        """for i in range(classnums):
            tpr.append( tp[i] / (tp[i] + fn[i]))
            fpr.append(fp[i] / (fp[i] + tn[i]))
        print('test_test_tpr', " ".join('%s' % id for id in tpr))
        print('test_test_fpr', " ".join('%s' % id for id in fpr))"""
        print('test_test_recall', " ".join('%s' % id for id in r))
        print('test_test_precision', " ".join('%s' % id for id in p))
        print('test_test_F1', " ".join('%s' % id for id in f1))
        ######################################################################
        conf_matrix = get_confusion_matrix(labels.view_as(preds), preds)
        plt.figure(figsize=(26, 26), dpi=60)
        plot_confusion_matrix(conf_matrix, classnums, epoch)
        ######################################################################


        print('Test set (epoch {}): Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(epoch, 
                                                                                              test_loss, 
                                                                                              correct, 
                                                                                              n_samples, acc))
        return acc
    ###################################################################
    def plot_confusion_matrix(conf_matrix, num_classes, epoch):
        plt.imshow(conf_matrix, cmap=plt.cm.Blues)
        indices = range(len(conf_matrix))
        if num_classes == 21:
            classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
        elif num_classes == 18:
            classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
        elif num_classes == 15:
            classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
        elif num_classes == 27:
            classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
        elif num_classes == 33:
            classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]
        plt.xticks(indices, classes)
        plt.yticks(indices, classes)
        plt.colorbar()
        plt.xlabel('y_pred')
        plt.ylabel('y_true')
        for first_index in range(len(conf_matrix)):
            for second_index in range(len(conf_matrix[first_index])):
                plt.text(first_index, second_index, conf_matrix[second_index, first_index])
        if epoch == 0:
            plt.savefig('./fig0.png', format='png')
        if epoch == 1:
            plt.savefig('./fig1.png', format='png')
        if epoch == 2:
            plt.savefig('./fig2.png', format='png')
        if epoch == 3:
            plt.savefig('./fig3.png', format='png')
        if epoch == 4:
            plt.savefig('./fig4.png', format='png')
        if epoch == 5:
            plt.savefig('./fig5.png', format='png')
        if epoch == 6:
            plt.savefig('./fig6.png', format='png')
        if epoch == 7:
            plt.savefig('./fig7.png', format='png')
        if epoch == 8:
            plt.savefig('./fig8.png', format='png')
        if epoch == 9:
            plt.savefig('./fig9.png', format='png')
        plt.show()

    def get_confusion_matrix(label, pred):
        conf_matrix = confusion_matrix(label, pred)
        return conf_matrix
    ###############################################################################################
    loss_fn = F.cross_entropy
    max_acc = 0.0
    t_start = time.time()
    for epoch in range(args.epochs):
        train(loaders[0])
        acc = test(loaders[1])
        accuracy_arr[fold_id][epoch] = acc
        max_acc = max(max_acc, acc)
    print("time: {:.4f}s".format(time.time() - t_start))
    acc_folds.append(max_acc)

print(acc_folds)
#print('{}-fold cross validation avg acc (+- std): {} ({})'.format(args.n_folds, np.mean(acc_folds), np.std(acc_folds)))

# mean_validation = accuracy_arr.mean(axis=0)
# maximum_epoch = np.argmax(mean_validation)
# average = np.mean(accuracy_arr[:, maximum_epoch])
# standard_dev = np.std(accuracy_arr[:, maximum_epoch])
# print('{}-fold cross validation avg acc (+- std): {} ({})'.format(args.n_folds, average, standard_dev))