In [None]:
import numpy as np
import networkx as nx
import random
import torch
from torch import nn
from torch import optim
import torch.utils.data as utils
from torchvision import datasets
from argparse import Namespace

import matplotlib
import matplotlib.pyplot as plt 

import os
import sys
sys.path.append('path')

from cvae_diffusion.model.cvae_muti import CVAE_MUTI
from cvae_diffusion.model.cvae import CVAE
from cvae_diffusion.model.score_sde import score_sde
from cvae_diffusion.model.diffusion import make_diffusion
from cvae_diffusion.model.utils import reset_weights
from cvae_diffusion.train_gada import train_gada_joint
from cvae_diffusion.train_cvae import train_cvae_independent
from cvae_diffusion.config import BIN_config_GADA

from cvae_diffusion.inv_vae import INV_VAE

seed = 666
np.random.seed(seed)


生成两组模拟数据

In [None]:
#模拟数据参数
n_nodes = 68 # 节点数量
n_nets = 2000 # 生成图数量
n_group_nets = 1000 # 分组

In [None]:
# 生成不同组别的图数据
def simulate_networks(category, seed=None):

    # 存放不同组别的度矩阵sim_A
    B_nets = []

    # 添加组别信息
    group = np.zeros(n_nets)
    group_ids = np.zeros_like(group)
    group_ids[n_group_nets:] = 1
    
    # 设置随机种子
    np.random.seed(seed)
    group[:n_group_nets] = np.random.normal(0.5, 0.01, n_group_nets)    
    group[n_group_nets:] = np.random.normal(1, 0.01, n_nets-n_group_nets)
    
    for i in range(n_nets):
        ## Erdos network
        if category == 'Erdos':
            G = nx.gnm_random_graph(n_nodes,400)
            
        ## Small world network
        elif category == 'SmallWorld':
            G = nx.watts_strogatz_graph(n_nodes, 10, 0.5)
                     
        ## Random Community network
        elif category == 'RandomCommunity':
            G = nx.random_partition_graph([n_nodes//2, n_nodes//2], 0.25, 0.01)
            
        ## Scale free network
        elif category == 'ScaleFree':
            G = nx.barabasi_albert_graph(n_nodes, 5)
            
        else:
            raise ValueError("Invalid category")
                    
        A = nx.to_scipy_sparse_array(G).todense().reshape(n_nodes, n_nodes)
        B = np.matmul(group[i]*A, group[i]*A)
        B[range(n_nodes), range(n_nodes)] = 0
        B_nets.append(B)
        
    return np.array(B_nets), np.array(group_ids)

In [None]:
# 生成不同组别的特性y
def simulate_traits(category, seed=None):
    alpha = np.zeros(68)
    alpha[0:17]= 1
    y = []
    
    B_nets, _ = simulate_networks(category, seed)

    for i in range(n_nets):
        trait = np.matmul(np.matmul(alpha, B_nets[i]), alpha).item() # 根据公式计算y
        y.append(trait)
        
    # 标准化
    y_std = (np.array(y) -  np.mean(np.array(y))) / np.std(np.array(y))
    
    # 设置随机种子
    np.random.seed(seed)
    
    # 随机噪声
    epsilon = np.random.normal(0,1,y_std.shape)
    y_std += epsilon
    
    return np.array(y_std)

In [None]:
# 生成例如RandomCommunity的数据
B_nets, group_ids = simulate_networks('RandomCommunity', seed)
y = simulate_traits('RandomCommunity', seed)
# 打乱顺序
np.random.seed(seed)
np.random.shuffle(B_nets)
np.random.shuffle(group_ids)
np.random.shuffle(y)
print(B_nets.shape, group_ids.shape, y.shape)

Training

In [None]:
# 超参数设置
device = device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
config = BIN_config_GADA()
config.sde_type = 'vesde'
config.iw_sample_p = 'drop_all_iw'
config.iw_sample_q = 'll_uniform'
config.embedding_type = 'fourier'
config.sigma2_0 = config.sigma2_min

In [None]:
print(config)

In [None]:
# 设置训练、验证、测试数据集
ids = list(range(n_nets))
random.shuffle(ids)
# print(ids)

train_ids = ids[:int(0.8*n_nets)]
val_ids = ids[int(0.8*n_nets):]
# print(len(train_ids), len(val_ids), len(test_ids))

tensor_nets = torch.stack([torch.Tensor(i) for i in list(B_nets)]).to(torch.float32).view(-1, n_nodes*n_nodes)
tensor_group = torch.from_numpy(group_ids).to(torch.float32).view(-1, 1)
tensor_trait = torch.from_numpy(y).to(torch.float32).view(-1, 1)


train_set = utils.TensorDataset(tensor_nets[train_ids], 
                        tensor_group[train_ids], tensor_trait[train_ids])
val_set = utils.TensorDataset(tensor_nets[val_ids], 
                        tensor_group[val_ids], tensor_trait[val_ids])

train_loader = utils.DataLoader(train_set, config.batch_size, shuffle=True, drop_last=True) 
val_loader = utils.DataLoader(val_set, config.batch_size, shuffle=False, drop_last=True)

In [None]:
# 生成 K 近邻掩码
A_mat = np.mean(B_nets, axis=0)
A_mat = A_mat + A_mat.transpose()
knn_masks = [torch.from_numpy(np.argsort(np.argsort(A_mat, axis=-1), axis=-1) < config.n_neighbors+i).float().to(config.device) for i in range(config.n_dec_layers)]
print(len(knn_masks), knn_masks[0].shape)

In [None]:
# 创建条件VAE
cvae = CVAE(config).to(config.device)
#cvae = CVAE_MUTI(config).to(config.device)
cvae.apply(reset_weights)
cvae.set_mask(knn_masks)
# 创建扩散模型
diffusion_cont = make_diffusion(config)
score = score_sde(config).to(config.device)
score.apply(reset_weights)
# 优化器
cvae.optimizer = optim.Adam(cvae.parameters(), lr=config.lr)
score.optimizer = optim.Adam(score.parameters(), lr=config.lr)

In [None]:
train_loss_0, val_loss_0 = train_gada_joint(train_loader, val_loader, diffusion_cont, score, score.optimizer, cvae, cvae.optimizer, config.n_epochs, config)

In [None]:
cvae1 = CVAE(config).to(config.device)
cvae1.apply(reset_weights)
cvae1.set_mask(knn_masks)
cvae1.optimizer = optim.Adam(cvae1.parameters(), lr=config.lr)

In [None]:
train_loss_1, val_loss_1 = train_cvae_independent(train_loader, val_loader, cvae1, cvae1.optimizer, config.device, config.n_epochs, config)

In [None]:
from cvae_diffusion.helpers import Namespace, reset_weights
# model config
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
con = Namespace(n_nodes=68, latent_dim=68, hidden_dim=1024, nuisance_dim=1,
    n_enc_layers=1, n_dec_layers=5, n_neighbors=32, drop_out=0.,
    beta = 1., gamma = .5, add_reg = True, y_dim = 1,               
    batch_size=32, n_epochs=200, lr=3e-6, device=DEVICE)

cond_model = INV_VAE(con).to(con.device)
cond_model.apply(reset_weights)
cond_model.set_mask(knn_masks)
optimizer = optim.Adam(cond_model.parameters(), lr=config.lr) 

In [None]:
train_loss_2 = []
val_loss_2 = []
for epoch in range(config.n_epochs):
    train_losses = cond_model.reg_train(epoch, train_loader, cond_model, optimizer, config.device, n_epoch_display=5)
    val_losses = cond_model.reg_test(epoch, val_loader, cond_model, config.device, n_epoch_display=5)
    train_loss_2.append(train_losses)
    val_loss_2.append(val_losses)
    
train_loss_2 = np.array(train_loss_2)
val_loss_2 = np.array(val_loss_2)