In [1]:
import os

import sys
import pprint

root = '/'
pprint.pprint(sys.path)

import_path = root + 'pigvae_all'
sys.path.append(import_path)
pprint.pprint(sys.path)

import_path2 = root + "ddpm-torch"
sys.path.append(import_path2)
pprint.pprint(sys.path)

['/opt/conda/lib/python310.zip',
 '/opt/conda/lib/python3.10',
 '/opt/conda/lib/python3.10/lib-dynload',
 '',
 '/opt/conda/lib/python3.10/site-packages']
['/opt/conda/lib/python310.zip',
 '/opt/conda/lib/python3.10',
 '/opt/conda/lib/python3.10/lib-dynload',
 '',
 '/opt/conda/lib/python3.10/site-packages',
 '/torch_cuda/pigvae_all']
['/opt/conda/lib/python310.zip',
 '/opt/conda/lib/python3.10',
 '/opt/conda/lib/python3.10/lib-dynload',
 '',
 '/opt/conda/lib/python3.10/site-packages',
 '/torch_cuda/pigvae_all',
 '/torch_cuda/ddpm-torch']


In [2]:
import numpy as np
import torch
from torch.utils.data import Dataset
from torch.utils.data.distributed import DistributedSampler
import random
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx
import networkx as nx
from networkx.algorithms.shortest_paths.dense import floyd_warshall_numpy

from networkx.generators.random_graphs import *
from networkx.generators.ego import ego_graph
from networkx.generators.geometric import random_geometric_graph



In [3]:
num_node_f = 36
num_edge_f = 6

In [4]:
hparams = {
    "vae":True,
    "kld_loss_scale":0.01,
    "perm_loss_scale":0.1,
    "property_loss_scale":0.5,
    "num_node_features":num_node_f,
    "num_edge_features":1+num_edge_f+1,
    "emb_dim": 50,
    'graph_encoder_hidden_dim': 256,
    'graph_encoder_k_dim': 64,
    'graph_encoder_v_dim': 64,
    'graph_encoder_num_heads': 16,
    'graph_encoder_ppf_hidden_dim': 512,
    'graph_encoder_num_layers': 16,
    'graph_decoder_hidden_dim': 256,
    'graph_decoder_k_dim': 64,
    'graph_decoder_v_dim': 64,
    'graph_decoder_num_heads': 16,
    'graph_decoder_ppf_hidden_dim': 512,
    'graph_decoder_num_layers': 16,
    "graph_decoder_pos_emb_dim": 64,
    'property_predictor_hidden_dim': 3,
    'num_properties': 1
}

In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [6]:
from torch import nn
from torch.nn import Linear, LayerNorm, Dropout
from torch.nn.functional import relu, pad
from pigvae.graph_transformer import Transformer, PositionalEncoding
#from pigvae.synthetic_graphs.data import DenseGraphBatch

from pigvae.models import GraphEncoder, GraphDecoder, Permuter

In [7]:
import os
import torch
import numpy as np
from torch.optim import Adam, lr_scheduler
from ddpm_torch.utils import seed_all, infer_range
from ddpm_torch.toy import *

import torch
import torch.nn as nn

from ddpm_torch.modules import Linear, Sequential
from ddpm_torch.functions import get_timestep_embedding


DEFAULT_NORMALIZER = nn.LayerNorm
DEFAULT_NONLINEARITY = nn.LeakyReLU(negative_slope=0.02, inplace=True)


class TemporalLayer(nn.Module):
    normalize = DEFAULT_NORMALIZER
    nonlinearity = DEFAULT_NONLINEARITY

    def __init__(self, in_features, out_features, temporal_features):
        super(TemporalLayer, self).__init__()
        self.norm1 = self.normalize(in_features)
        self.fc1 = Linear(in_features, out_features, bias=False)
        self.norm2 = self.normalize(out_features)
        self.fc2 = Linear(out_features, out_features, bias=False)
        self.enc = Linear(temporal_features, out_features)

        self.skip = nn.Identity() if in_features == out_features else Linear(in_features, out_features, bias=False)

    def forward(self, x, t_emb):
        out = self.fc1(self.nonlinearity(self.norm1(x)))
        out += self.enc(t_emb)
        out = self.fc2(self.nonlinearity(self.norm2(out)))
        skip = self.skip(x)
        return out + skip


class Denoiser(nn.Module):
    normalize = DEFAULT_NORMALIZER
    nonlinearity = DEFAULT_NONLINEARITY

    def __init__(self, in_features, mid_features, num_temporal_layers):
        super(Denoiser, self).__init__()

        self.in_fc = Linear(in_features, mid_features, bias=False)
        self.temp_fc = Sequential(*([TemporalLayer(
            mid_features, mid_features, mid_features), ] * num_temporal_layers))
        self.out_norm = self.normalize(mid_features)
        self.out_fc = Linear(mid_features, in_features)
        self.t_proj = nn.Sequential(
            Linear(mid_features, mid_features),
            self.nonlinearity)
        self.mid_features = mid_features

    def forward(self, x, t):
        t_emb = get_timestep_embedding(t, self.mid_features)
        t_emb = self.t_proj(t_emb)
        out = self.in_fc(x)
        out = self.temp_fc(out, t_emb=t_emb)
        out = self.out_fc(self.out_norm(out))
        return out

In [8]:
# model parameters
model_mean_type = "eps"
model_var_type = "fixed-large"
loss_type = "mse"
lat_dim = 50
in_features = lat_dim
out_features = 2 * in_features if model_var_type == "learned" else in_features
mid_features = 256
num_temporal_layers = 3

In [9]:
# diffusion parameters
beta_schedule = "linear"
beta_start, beta_end = 0.001, 0.2
timesteps = 500
betas = get_beta_schedule(beta_schedule, beta_start=beta_start, beta_end=beta_end, timesteps=timesteps)

In [10]:
from pigvae.models import GraphEncoder, GraphDecoder, Permuter 
from pigvae.models import BottleNeckEncoder, BottleNeckDecoder, PropertyPredictor

import torch
from torch.nn import Linear, LayerNorm, Dropout
from torch.nn.functional import relu, pad
from pigvae.graph_transformer import Transformer, PositionalEncoding
from pigvae.synthetic_graphs.data import DenseGraphBatch


class GraphLDA(torch.nn.Module):
    def __init__(self, hparams):
        super().__init__()
        #self.vae = hparams["vae"]
        self.encoder = GraphEncoder(hparams)
        self.bottle_neck_encoder = BottleNeckEncoder(hparams)
        self.bottle_neck_decoder = BottleNeckDecoder(hparams)
        self.property_predictor = PropertyPredictor(hparams)
        self.permuter = Permuter(hparams)
        self.decoder = GraphDecoder(hparams)

        self.dense_fn = Denoiser(in_features, mid_features, num_temporal_layers)
        self.diffusion = GaussianDiffusion(betas=betas, model_mean_type=model_mean_type, model_var_type=model_var_type, loss_type=loss_type)

    def encode(self, node_features, edge_features, mask):

        graph_emb, node_features = self.encoder(
            node_features=node_features,
            edge_features=edge_features,
            mask=mask,
        )
        graph_emb, mu, logvar = self.bottle_neck_encoder(graph_emb)
        return  graph_emb, mu, logvar, node_features

    def decode(self, graph_emb, perm, mask=None):
        props = self.property_predictor(graph_emb).squeeze()

        """
        if mask is None:
            num_nodes = torch.round(props * STD_NUM_NODES + MEAN_NUM_NODES).long()
            mask = torch.arange(max(num_nodes)).type_as(num_nodes).unsqueeze(0) < num_nodes.unsqueeze(1)
        """
        
        graph_emb = self.bottle_neck_decoder(graph_emb)
        node_logits, edge_logits = self.decoder(
            graph_emb=graph_emb,
            perm=perm,
            mask=mask
        )

        return node_logits, edge_logits, props

    def forward(self, node_features, edge_features, mask, training, tau):
        graph_emb, mu, logvar, node_features = self.encode(node_features, edge_features, mask)
        perm = self.permuter(node_features, mask=mask, hard=not training, tau=tau)

        #diffusion process
        z_0 = graph_emb
        B = z_0.shape[0]
        T = self.diffusion.timesteps
        t = torch.randint(T, size=(B, ), dtype=torch.int64, device=device)
        t_noise = torch.randn_like(graph_emb)
        z_t = self.diffusion.q_sample(z_0, t, noise=t_noise)

        #denoising
        model_out = self.dense_fn(z_t, t) #model_out : 出力, t_noise : 予測するターゲット
        
        #deocde from noisy latent vector
        graph_pred = self.decode(z_0, perm, mask)

        return graph_pred, perm, graph_emb, mu, logvar, model_out, t_noise

class BottleNeckEncoder(torch.nn.Module):
    def __init__(self, hparams):
        super().__init__()
        self.d_in = hparams["graph_encoder_hidden_dim"]
        self.d_out = hparams["emb_dim"]
        self.wu = Linear(self.d_in, self.d_out)
        self.wv = Linear(self.d_in, self.d_out)

    def forward(self, x):
        mu = self.wu(relu(x))
        logvar = self.wv(relu(x))
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        z = mu + eps * std

        return z, mu, logvar

In [11]:
model = GraphLDA(hparams).to(device)

In [12]:
load_model_dir = root + 'save_models/qm9/pig-beta-e3diffvae_with_trained-ae_models/'

model = torch.load(load_model_dir + "pigvae_best_model.pt")
model.to(device).eval()

GraphLDA(
  (encoder): GraphEncoder(
    (posiotional_embedding): PositionalEncoding()
    (graph_transformer): Transformer(
      (self_attn_layers): ModuleList(
        (0): SelfAttention(
          (w_qs): Linear(in_features=256, out_features=1024, bias=False)
          (w_ks): Linear(in_features=256, out_features=1024, bias=False)
          (w_vs): Linear(in_features=256, out_features=1024, bias=False)
          (fc): Linear(in_features=1024, out_features=256, bias=False)
          (attention): ScaledDotProductWithEdgeAttention(
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (dropout): Dropout(p=0.1, inplace=False)
          (layer_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
        )
        (1): SelfAttention(
          (w_qs): Linear(in_features=256, out_features=1024, bias=False)
          (w_ks): Linear(in_features=256, out_features=1024, bias=False)
          (w_vs): Linear(in_features=256, out_features=1024, bias=False)
          

In [13]:
import os
import shutil
import argparse
import yaml
#from easydict import EasyDict
from tqdm.auto import tqdm
from glob import glob
import torch
#import torch.utils.tensorboard
from torch.nn.utils import clip_grad_norm_
from torch_geometric.data import DataLoader

# Loop

In [14]:
def load_pickes(load_dir):
    with open(load_dir + "node_features.pickle", 'br') as fa:
        node_features = pickle.load(fa)
        fa.close
          
    with open(load_dir + "edge_features.pickle", 'br') as fb:
        edge_features = pickle.load(fb)
        fb.close
          
    with open(load_dir + "masks.pickle", 'br') as fc:
        masks = pickle.load(fc)
        fc.close

    with open(load_dir + "props.pickle", 'br') as fd:
        props = pickle.load(fd)
        fd.close

    with open(load_dir + "all_targets.pickle", 'br') as fe:
        targets = pickle.load(fe)
        fe.close
        
    return node_features, edge_features, masks, props, targets

In [15]:
def graphs_to_embs(dataloader):
    
    z_mus = []
    z_vars= []
    t_list = []
    
    for batch_idx, batch_data in enumerate(dataloader):

        batch_stds = []

        node_features, edge_features, mask, props, targets = batch_data
        node_features, edge_features, mask, props = node_features.to(device), edge_features.to(device), mask.to(device), props.to(device)

        z, mu, logvar, node_features = model.encode(node_features, edge_features, mask)
        batch_std = torch.exp(0.5 * logvar)
    
        for std in batch_std:
            diag_matrix = torch.diag(std * std)
            batch_stds.append(diag_matrix.cpu().detach().numpy())
        
        batch_stds = np.reshape(np.array(batch_stds), (-1, hparams["emb_dim"], hparams["emb_dim"]))

        z_vars.extend(batch_stds)
        z_mus.extend(mu.cpu().detach().numpy())
        
        t_list.extend(targets.cpu().detach().numpy())
    
        del z, mu, logvar, batch_std
        torch.cuda.empty_cache()
        
        if batch_idx % 50 == 0:
            print(batch_idx)
        
    return np.array(z_mus), np.array(z_vars), np.array(t_list)

In [16]:
import pickle

load_dir = root + "dataset/eval_dataset/molecular_properties/qm9/e3graphs/"
sample_save_dir = load_model_dir + "samples_for_mcmc/"

print(sample_save_dir)

if not os.path.exists(sample_save_dir):
    os.makedirs(sample_save_dir)

node_features, edge_features, masks, props, targets = load_pickes(load_dir)
dataset = torch.utils.data.TensorDataset(node_features, edge_features, masks, props, targets)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=False)

z_mus, z_vars, ys = graphs_to_embs(dataloader)
print(z_mus.shape, z_vars.shape, ys.shape)

np.save(sample_save_dir + "embs_mu.npy", z_mus)
np.save(sample_save_dir + "embs_var.npy", z_vars)
np.save(sample_save_dir + "all_targets.npy", ys)

/torch_cuda/save_qm9-models/pig-beta-e3diffvae_with_trained-ae_models/samples_for_mcmc/
0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500
1550
1600
1650
1700
1750
1800
1850
1900
1950
2000
2050
2100
2150
2200
2250
2300
2350
2400
2450
2500
2550
2600
2650
2700
2750
2800
2850
2900
2950
3000
3050
3100
3150
3200
3250
3300
3350
3400
3450
3500
3550
3600
3650
3700
3750
3800
3850
3900
3950
4000
4050
4100
4150
4200
4250
4300
4350
4400
4450
4500
4550
4600
4650
4700
4750
4800
4850
4900
4950
5000
5050
5100
5150
5200
5250
5300
5350
5400
5450
5500
5550
5600
5650
5700
5750
5800
5850
5900
5950
6000
6050
6100
6150
6200
6250
6300
6350
6400
6450
6500
6550
6600
6650
6700
6750
6800
6850
6900
6950
7000
7050
7100
7150
7200
7250
7300
7350
7400
7450
7500
7550
7600
7650
7700
7750
7800
7850
7900
7950
8000
8050
8100
8150
8200
8250
8300
8350
8400
8450
8500
8550
8600
8650
8700
8750
8800
8850
8900
8950
9000
9050
9100
9150
9200
9250
9300
