<a href="https://colab.research.google.com/github/gerritgr/Alia/blob/main/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# AliaMolecule Inference

#### Project Name

In [None]:
#!pip install wandb --force-reinstall

In [None]:
PROJECT_NAME = "AliaMoleculePaper2"
PATH_PATTERN_BASE = "aliamol_paper" #aliamol2 is trained on denoised image
BASELINE = False


DEBUG = False
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'


#### Setup

In [None]:
# Load drive

import os
USE_COLAB = False
try:
  from google.colab import drive
  USE_COLAB = True
except:
  pass

try:
  import wandb # need to do this before chaning cwd
except:
  os.system("pip install wandb")


if USE_COLAB:
  if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')
  dir_path = f'/content/drive/MyDrive/colab/{PROJECT_NAME}/'
  if not os.path.exists(dir_path):
    os.makedirs(dir_path)
  print("Current Working Directory: ", os.getcwd())
  if os.getcwd() != dir_path:
    os.chdir(dir_path)
    print("New Working Directory: ", os.getcwd())

Mounted at /content/drive
Current Working Directory:  /content
New Working Directory:  /content/drive/MyDrive/colab/AliaMoleculePaper


In [None]:
# Install packages

import os
import torch
torch_version = torch.__version__.split("+")
#os.environ["TORCH"] = torch_version[0]
#os.environ["CUDA"] = torch_version[1]
try:
  import torch_geometric
except:
  os.system("pip install pyg-lib torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html")
  os.system("pip install torch-geometric")

try:
  import rdkit
except:
  os.system("pip install rdkit")

PATH_PATTERN = PATH_PATTERN_BASE

#### Imports

In [None]:
#%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.dpi'] = 100 # Set this to 300 to get better image quality
from PIL import Image # We use PIL to load images
import seaborn as sns
#import imageio # to generate .gifs
import networkx as nx

# always good to have
import glob, random, os, traceback, time, copy
import pickle
import numpy as np
import networkx as nx
import math
from tqdm import tqdm
import gzip

import torch
from torch import nn
from torch.optim import Adam
from torch.nn import Linear as Lin
from torch.nn import Sequential as Seq
from torch.nn import Linear
import torch.nn.functional as F
from torch.optim import Adam

import torch_geometric
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GATv2Conv, GraphNorm, BatchNorm
from torch_geometric.utils import erdos_renyi_graph, to_networkx, from_networkx
from torch_geometric.nn import global_mean_pool, global_add_pool

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#DEVICE = torch.device('cpu')

### Load External

In [None]:
if not os.path.exists("smiles_to_pyg"):
  os.system("git clone https://github.com/gerritgr/Alia.git && cp -R Alia/* .")
from smiles_to_pyg.molecule_load_and_convert import *

#### Hyperparams

In [None]:
##
## Diffusion
##
TIMESTEPS = 1000
START = 0.0001
END = 0.015

# Training
BATCH_SIZE = 128*2
GAMMA = 0.1

##
## Pred
##
LEARNING_RATE_GEN = 0.001
EPOCHS_GEN = 100

### PNA Pred
DROPOUT_PRED = 0.05
DEPTH_PRED = 4
HIDDEN_CHANNELS_PRED = 32
TOWERS_PRED = 1
NORMALIZATION_PRED = True

##
## Disc
##
EPOCHS_DISC_MODEL = 70
DISC_NOISE=0.3

### PNA Disc
HIDDEN_CHANNELS_DISC = 8
DEPTH_DISC = 4
DROPOUT_DISC = 0.05
NORMALIZATION_DISC = True


##
## Molecule Encoding
##

INDICATOR_FEATURE_DIM = 1
FEATURE_DIM = 5 # (has to be the same for atom and bond)
ATOM_FEATURE_DIM = FEATURE_DIM
BOND_FEATURE_DIM = FEATURE_DIM
NON_NODES = [True] + [False]*5 + [True] * 5
NON_EDGES = [True] + [True]*5 + [False] * 5

TIME_FEATURE_DIM = 1

# Utils

In [None]:
def log(d):
  try:
    import wandb
    wandb.log(d)
  except:
    print(d)

In [None]:
def load_file(filepath):
  print("try to read ", filepath)
  try:
    with gzip.open(filepath, 'rb') as f:
      return pickle.load(f)
  except Exception as e:
      print(f"An error occurred: {str(e)}")
      raise

def write_file(filepath, data):
  try:
    data = data.cpu()
  except:
    pass
  print("try to write ", filepath)
  with gzip.open(filepath, 'wb') as f:
    pickle.dump(data, f)

In [None]:

def build_dataset(seed=1234):
  try:
    dataset_train, dataset_test = load_file('dataset.pickle')
    return dataset_train, dataset_test
  except Exception as e:
    print(f"Could not load dataset due to error: {str(e)}, generate it now")

  dataset = read_qm9()
  dataset_all = [g for g in dataset if g.x.shape[0] > 1]
  dataset = list()
  for g in tqdm(dataset_all):
    try:
      assert "None" not in str(pyg_to_smiles(g))
      dataset.append(g)
    except:
      pass
  print("Built and clean dataset, length is ", len(dataset), "old length was", len(dataset_all))
  random.Random(seed).shuffle(dataset)
  split = int(len(dataset)*0.8 + 0.5)
  dataset_train = dataset[:split]
  dataset_test = dataset[split:]
  assert(dataset_train[0].x[0,:].numel() == INDICATOR_FEATURE_DIM + ATOM_FEATURE_DIM + BOND_FEATURE_DIM)

  write_file("dataset.pickle", (dataset_train, dataset_test))
  return dataset_train, dataset_test


In [None]:
def generate_schedule(start = START, end = END, timesteps=TIMESTEPS):
  """
  Generates a schedule of beta and alpha values for a forward process.

  Args:
  start (float): The starting value for the beta values. Default is START.
  end (float): The ending value for the beta values. Default is END.
  timesteps (int): The number of timesteps to generate. Default is TIMESTEPS.

  Returns:
  tuple: A tuple of three tensors containing the beta values, alpha values, and
  cumulative alpha values (alpha bars).
  """
  betas = torch.linspace(start, end, timesteps, device = DEVICE)
  #alphas = 1.0 - betas
  #alpha_bars = torch.cumprod(alphas, axis=0)
  assert(betas.numel() == TIMESTEPS)
  return betas

In [None]:
import matplotlib.pyplot as plt
from rdkit import Chem
from rdkit.Chem import Draw

def visualize_smiles_from_file(filepath):
    # Read SMILES from file
    with open(filepath, 'r') as file:
        smiles_list = [line.strip() for line in file.readlines()]

    # Convert SMILES to RDKit Mol objects, filtering out invalid ones
    mols = [Chem.MolFromSmiles(smile) for smile in smiles_list[:100]]
    mols = [mol for mol in mols if mol is not None]

    # Determine grid size
    num_mols = len(mols)
    cols = 10
    rows = min(10, -(-num_mols // cols))  # ceil division

    # Create a subplot grid
    fig, axs = plt.subplots(rows, cols, figsize=(20, 20),
                            gridspec_kw={'wspace': 0.3, 'hspace': 0.3})

    for i in range(rows):
        for j in range(cols):
            ax = axs[i, j]
            ax.axis("off")  # hide axis
            idx = i * cols + j  # index in mols list
            if idx < num_mols:
                img = Draw.MolToImage(mols[idx], size=(200, 200))
                ax.imshow(img)
            else:
                break

    # Save the figure
    plt.savefig(filepath + '.jpg', format='jpg', bbox_inches='tight')
    plt.close(fig)  # Close the figure after saving to free up memory
    try:
        time.sleep(0.01)
        wandb.log_artifact(filepath + '.jpg', name=f"jpg_{SWEEP_ID}_{filepath.replace('.','')}", type="smiles_grid_graph")
    except Exception as e:
        print(e)
        pass

# Example usage:
# Replace YOUR_FILE_PATH with the path to your SMILES file.
# visualize_smiles_from_file(YOUR_FILE_PATH)


# Base Model

In [None]:
from torch_geometric.nn import PNA
from torch_geometric.utils import degree


def dataset_to_degree_bin(train_dataset):
  try:
    deg = load_file('deg.pickle')
    deg = deg.to(DEVICE)
    return deg
  except Exception as e:
    print(f"Could not find degree bin due to error: {str(e)}, generate it now")
  assert(train_dataset is not None)


  # Compute the maximum in-degree in the training data.
  max_degree = -1
  for data in train_dataset:
    data = data.to(DEVICE)
    d = degree(data.edge_index[1], num_nodes=data.num_nodes, dtype=torch.long)
    max_degree = max(max_degree, int(d.max()))

  deg = torch.zeros(max_degree + 1, dtype=torch.long, device=DEVICE)
  for data in train_dataset:
    data = data.to(DEVICE)
    d = degree(data.edge_index[1], num_nodes=data.num_nodes, dtype=torch.long)
    deg += torch.bincount(d, minlength=deg.numel())

  write_file("deg.pickle", deg.cpu())
  return deg





class PNAnet(torch.nn.Module):
  def __init__(self, train_dataset=None, hidden_channels=HIDDEN_CHANNELS_PRED, depth=DEPTH_PRED, dropout=DROPOUT_PRED, towers=TOWERS_PRED, normalization=NORMALIZATION_PRED, pre_post_layers=1):
    super(PNAnet, self).__init__()
    self.sigmoid = nn.Sigmoid()

    # Calculate x as the difference between mult_y and hidden_dim
    hidden_channels = towers * ((hidden_channels // towers) + 1) #tod fix
    #out_channels = towers * ((out_channels // towers) + 1)

    in_channels = INDICATOR_FEATURE_DIM + ATOM_FEATURE_DIM + BOND_FEATURE_DIM+ TIME_FEATURE_DIM #INDICATOR_FEATURE_DIM entries are noise free
    out_channels = FEATURE_DIM

    deg = dataset_to_degree_bin(train_dataset)
    aggregators = ['mean', 'min', 'max', 'std']
    scalers = ['identity', 'amplification', 'attenuation']
    self.normalization = BatchNorm(hidden_channels) if normalization else None
    self.pnanet = PNA(in_channels=in_channels, hidden_channels=hidden_channels, out_channels=hidden_channels, num_layers=depth, aggregators=aggregators, scalers=scalers, deg=deg, dropout=dropout, towers=towers, norm=self.normalization, pre_layers=pre_post_layers, post_layers=pre_post_layers)

    self.final_mlp = Seq(Lin(hidden_channels, hidden_channels), nn.ReLU(), Lin(hidden_channels, hidden_channels), nn.ReLU(), Lin(hidden_channels, out_channels))


  def forward(self, x_in, t, edge_index):
    row_num = x_in.shape[0]
    t = t.view(-1,TIME_FEATURE_DIM)
    x = torch.concat((x_in, t), dim=1)
    x = self.pnanet(x, edge_index)
    x = self.final_mlp(x)
    assert(x.numel() > 1 )
    assert(x.shape[0] == row_num)

    #node_indicator = x_in[:,0] > 0
    #node_indicator = x_in[:,0] < 0
    #x[node_indicator, NON_NODES] = x_in[node_indicator, NON_NODES]
    #x[edge_indicator, NON_EDGES] = x_in[edge_indicator, NON_EDGES]

    return x


#model = PNAnet([data])

#model(data.x, data.edge_index, torch.ones(data.x.shape[0]))

In [None]:
#path_pattern = "aliamol_model_epoch_*.pth"
#sorted(glob.glob(path_pattern))

In [None]:
def load_latest_checkpoint(model, optimizer, loss_list, epoch_i, path_pattern=None):
  if path_pattern is None:
    path_pattern = PATH_PATTERN + "_model_epoch_*.pth"
  try:
    checkpoint_paths = sorted(glob.glob(path_pattern))
    if len(checkpoint_paths) == 0:
      return model, optimizer, loss_list, epoch_i

    latest_checkpoint_path = checkpoint_paths[-1]
    checkpoint = torch.load(latest_checkpoint_path, map_location=DEVICE)

    # Assuming model and optim are your initialized model and optimizer
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch_i = checkpoint['epoch']
    loss_list = checkpoint['loss_list']
    print(f"read checkpoint of epoch {epoch_i:08} from disc.")
  except:
    pass

  return model, optimizer, loss_list, epoch_i

def save_model(model, optimizer, loss_list, epoch_i, upload=False):
  if epoch_i == 0:
    return
  save_path = f"{PATH_PATTERN}_model_epoch_{epoch_i:08}.pth"

  # Save the model state dict and the optimizer state dict in a dictionary
  torch.save({
              'epoch': epoch_i,
              'loss_list': loss_list,
              'model_state_dict': model.state_dict(),
              'optimizer_state_dict': optimizer.state_dict()
              }, save_path)
  if upload:
    try:
      wandb.log_artifact(save_path, name=f"src_txt_{SWEEP_ID}_{epoch_i:08}_weightfile", type="weight")
    except Exception as e:
      print(e)
      pass


In [None]:
def load_base_model(dataset_train, path_pattern=None):
  model_base = PNAnet(dataset_train)
  model_base = model_base.to(DEVICE)
  loss_list = None
  optimizer = Adam(model_base.parameters(), lr = LEARNING_RATE_GEN)
  model_base, optimizer, loss_list, epoch_start = load_latest_checkpoint(model_base, optimizer, loss_list, epoch_i=0, path_pattern=path_pattern)

  return model_base

# Inference

In [None]:
def denoise_one_step_wild(model, g, i):
  betas = generate_schedule()
  t = TIMESTEPS - i - 1 # i=0 is full noise
  beta_t = betas[t]
  alphas = 1. - betas
  alphas_cumprod = torch.cumprod(alphas, axis=0)
  alphas_cumprod_t = alphas_cumprod[t]
  sqrt_one_minus_alphas_cumprod_t = torch.sqrt(1. - alphas_cumprod_t)
  sqrt_recip_alphas_t = torch.sqrt(1.0 / alphas[t])
  alphas_cumprod_prev = F.pad(alphas_cumprod[:-1], (1, 0), value=1.0)
  row_num = g.x.shape[0]

  mask = torch.concat((torch.tensor([False]*g.x_old.shape[0], device=DEVICE).view(-1,1), g.x_old[:,1:]>-0.5), dim=1)
  future_t = torch.tensor([float(t)] * g.x.shape[0], device=DEVICE).view(-1,1)

  denoised_x = g.x.clone()
  original_pred = model(g.x, future_t, g.edge_index)

  #noise_pred = noise_pred.view(row_num, -1)
  #x_with_noise = g.x[mask].view(row_num, -1)
  #assert(noise_pred.shape == x_with_noise.shape)
  #future_t = torch.tensor([int(t)] * g.x.shape[0], device=DEVICE).view(-1)
  #original_pred = get_pred_from_noise(noise_pred, x_with_noise, future_t)

  if t-1>0:
    x_with_noise_again, _ = forward_diffusion(original_pred, t-1)
    denoised_x[mask] = x_with_noise_again.flatten()
  else:
    denoised_x[mask] = original_pred.flatten()
  return denoised_x



  #x_in = g.x[mask].flatten()
  #original_pred = get_pred_from_noise(noise_pred, x_in, future_t)
  ##original_pred = (x_in - torch.sqrt(1. - alphas_cumprod_t) * noise_pred)/torch.sqrt(alphas_cumprod_t)
  #assert(original_pred.shape[0] = x_in.shape[0])
  #x = g.x.clone()
  #x[mask] = original_pred
  #if t-1 <= 0:
  #  return x
  #x_with_noise_again, _ = forward_diffusion(x, t-1)
  #denoised_x[mask] = x_with_noise_again[mask]
  #return denoised_x


In [None]:
def denoise_one_step(model, g, i):
  row_num = g.x.shape[0]

  betas = generate_schedule()
  t = TIMESTEPS - i - 1 # i=0 is full noise
  beta_t = betas[t]
  alphas = 1. - betas
  alphas_cumprod = torch.cumprod(alphas, axis=0)
  alphas_cumprod_t = alphas_cumprod[t]
  sqrt_one_minus_alphas_cumprod_t = torch.sqrt(1. - alphas_cumprod_t)
  sqrt_recip_alphas_t = torch.sqrt(1.0 / alphas[t])
  alphas_cumprod_prev = F.pad(alphas_cumprod[:-1], (1, 0), value=1.0)


  mask = torch.concat((torch.tensor([False]*g.x_old.shape[0], device=DEVICE).view(-1,1), g.x_old[:,1:]>-0.5), dim=1)

  future_t = torch.tensor([float(t)] * g.x.shape[0], device=DEVICE).view(-1,1)

  original_pred = model(g.x, future_t, g.edge_index)

  x_with_noise = g.x[mask].view(row_num, -1)
  future_t = torch.tensor([int(t)] * g.x.shape[0], device=DEVICE).view(-1)
  noise_pred = get_noise_from_pred(original_pred, x_with_noise, future_t)

  values_now = g.x[mask].view(row_num, -1)
  values_endpoint = noise_pred.view(row_num, -1)#[mask] network only prdicts noise

  assert(values_now.shape == values_endpoint.shape)

  # now compute values_one_step_denoised
  model_mean = sqrt_recip_alphas_t * (values_now - beta_t * values_endpoint / sqrt_one_minus_alphas_cumprod_t)
  values_one_step_denoised = model_mean # if t == 0
  if t != 0:
    posterior_variance = betas * (1. - alphas_cumprod_prev) / (1. - alphas_cumprod) # in the paper this is in 3.2. note that sigma^2 is variance, not std
    posterior_std_t = torch.sqrt(posterior_variance[t])
    noise = torch.randn_like(values_now, device = DEVICE)
    values_one_step_denoised = model_mean + posterior_std_t * noise

  denoised_x = g.x.clone()
  denoised_x[mask] = values_one_step_denoised.flatten()
  return denoised_x


In [None]:
def overwrite_with_noise(g):
  g.x_old = g.x.clone()
  mask = torch.concat((torch.tensor([False]*g.x_old.shape[0], device=DEVICE).view(-1,1), g.x_old[:,1:]>-0.5), dim=1)
  g.x[mask] = torch.randn_like(g.x[mask])
  return g


In [None]:
@torch.inference_mode()
def generate_examples(model, dataset_train, num=100,wild=False):
  # Setup
  print("generate samples batched")
  model.eval()
  dataset_train_start = list()
  while len(dataset_train_start) < num:
    g = dataset_train[random.sample(range(len(dataset_train)),1)[0]]
    dataset_train_start.append(g.clone().to(DEVICE))
    g = dataset_train_start[-1]
  assert(len(dataset_train_start) == num)
  dataloader = DataLoader(dataset_train_start, batch_size = num)

  # Inference
  for g in dataloader:
    g = g.to(DEVICE)
    print("load g", g, g.batch)
    g = overwrite_with_noise(g)
    for i in tqdm(range(TIMESTEPS)):
      t = int(TIMESTEPS-i-1)
      if wild:
        x_with_less_noise = denoise_one_step_wild(model, g, i)
      else:
        x_with_less_noise = denoise_one_step(model, g, i)
      g.x = x_with_less_noise

    graph_list = g.to_data_list()
    graph_list = [g.cpu() for g in graph_list]

    print("generated graphs ", graph_list[:10])
    return graph_list


#### Frac Correct

In [None]:
def find_frac_correct(graphs):
  correct = 0
  smiles_list = list()
  for i, g in tqdm(list(enumerate(graphs))):
    smiles = pyg_to_smiles(g)
    if smiles is not None and '.' not in smiles:
      mol = Chem.MolFromSmiles(smiles)
      if mol is not None:
        correct += 1
        smiles_list.append((smiles, i))

  frac_correct = correct/len(graphs)
  unique_frac = len(list(set(smiles_list)))/len(graphs)
  return frac_correct, smiles_list, unique_frac

### Gen many graphs

In [None]:
#!ls aliamol2*

In [None]:
def gen_graphs(num_per_generation=1000, num_generations=40, wild=False, path_pattern=None):
  if DEBUG:
    num_generations = int(num_generations/10)
  if path_pattern is None:
    path_pattern = PATH_PATTERN+"_model_epoch_*.pth" #"aliamol_model_epoch_*.pth"
  path = sorted(glob.glob(path_pattern))[-1]
  num_samples = num_per_generation*num_generations
  filepath = path.replace(".pth", f'_{num_samples:06d}_w{wild}_generated.pickle')

  results = list()
  try:
    results = load_file(filepath)
  except:
    pass

  if len(results) == num_per_generation*num_generations:
    return results

  dataset_base, dataset_base_test = build_dataset()
  scatter_list = list()
  model_base = load_base_model(dataset_base, path_pattern = path)

  i = 0
  while len(results) < num_samples:
    i += 1
    num = max(num_per_generation, len(results) - num_samples)
    graphs = generate_examples(model_base, dataset_base, num=num, wild=wild)
    results = results + graphs
    if i % 5 == 0 or len(results) >= num_samples:
      write_file(filepath, results)

  assert(len(results) == num_per_generation*num_generations)
  return results



def test_graph_generation(path_pattern=None, wild=False):
  generated_graphs = gen_graphs(wild=wild, path_pattern=path_pattern)
  return find_frac_correct(generated_graphs) #0.54 #0.02

In [None]:
#test_graph_generation(path_pattern="aliamol_model_epoch_00003901.pth")

# Discriminator

In [None]:
from torch_geometric.nn import PNA


class PNAdisc(torch.nn.Module):
  def __init__(self, train_dataset=None, hidden_channels=HIDDEN_CHANNELS_DISC, depth=DEPTH_DISC, dropout=DROPOUT_DISC, towers=1, normalization=NORMALIZATION_DISC, pre_post_layers=1):
    super(PNAdisc, self).__init__()
    self.sigmoid = nn.Sigmoid()

    hidden_channels = towers * ((hidden_channels // towers) + 1)

    in_channels = INDICATOR_FEATURE_DIM + ATOM_FEATURE_DIM + BOND_FEATURE_DIM
    assert in_channels == 11
    deg = dataset_to_degree_bin(train_dataset)
    deg = deg.to(DEVICE)
    aggregators = ['mean', 'min', 'max', 'std']
    scalers = ['identity', 'amplification', 'attenuation']
    self.normalization = BatchNorm(hidden_channels) if normalization else None
    self.pnanet = PNA(in_channels=in_channels, hidden_channels=hidden_channels, out_channels=1, num_layers=depth, aggregators=aggregators, scalers=scalers, deg=deg, dropout=dropout, towers=towers, norm=self.normalization, pre_layers=pre_post_layers, post_layers=pre_post_layers)
    #self.pnanet = PNA(in_channels=11, hidden_channels=hidden_channels, out_channels=1, num_layers=depth, aggregators=aggregators, scalers=scalers, deg=deg)

    #self.final_mlp = Seq(Lin(hidden_channels, hidden_channels), nn.ReLU(),Lin(hidden_channels, hidden_channels), nn.ReLU(), Lin(hidden_channels, 1))


  def forward(self, x, edge_index, batch=None):
    #print("before: x.shape",x.shape, "edge_index.shape",edge_index.shape)
    x = x + torch.randn_like(x)*DISC_NOISE
    x = self.pnanet(x, edge_index)
    #print("after: x.shape",x.shape, "edge_index.shape",edge_index.shape)
    x = global_mean_pool(x, batch)
    #x = torch.sum(x)
    x = self.sigmoid(x)

    return x

In [None]:
def train_epoch_disc(model_disc, dataloader, optimizer):
  model_disc.train()
  start_time = time.time()
  loss_list = list()
  acc_list = list()
  for batch in dataloader:
    batch = batch.to(DEVICE)
    optimizer.zero_grad()
    #print("batch.x, batch.edge_index, batch.batch", batch, batch.x, batch.edge_index, batch.batch)
    pred = model_disc(batch.x, batch.edge_index, batch.batch)
    #print("pred ",pred, "y ", batch.y)
    loss = F.binary_cross_entropy(pred.flatten(), batch.y.flatten())
    loss.backward()
    optimizer.step()

    acc = (torch.abs(pred.flatten()-batch.y.flatten()) < 0.5).float()
    acc_list = acc_list + acc.detach().cpu().tolist()
    loss_list.append(loss.item())

  return np.mean(loss_list), np.mean(acc_list), time.time()-start_time

In [None]:
def test_disc(model_disc, dataloader):
  model_disc.eval()
  start_time = time.time()
  loss_list = list()
  acc_list = list()
  for batch in dataloader:
    batch = batch.to(DEVICE)
    pred = model_disc(batch.x, batch.edge_index, batch.batch)
    loss = F.binary_cross_entropy(pred.flatten(), batch.y.flatten())
    acc = (torch.abs(pred.flatten()-batch.y.flatten()) < 0.5).float()
    acc_list = acc_list + acc.detach().cpu().tolist()
    loss_list.append(loss.item())

  return np.mean(loss_list), np.mean(acc_list), time.time()-start_time

In [None]:
def train_disc_model(dataloader_disc, dataloader_disc_test, round_i):
  model_disc = PNAdisc(dataloader_disc)
  model_disc = model_disc.to(DEVICE)
  weight_path = f"discriminator_model_{round_i:05}.pth"

  try:
    checkpoint = torch.load(weight_path)
    model_disc.load_state_dict(checkpoint['model_state_dict'])
    print(f"found disc model in round {round_i:05}")
    return model_disc
  except:
    pass

  epochs = list()
  losses_train = list()
  losses_test = list()

  optimizer_disc = Adam(model_disc.parameters(), lr = 0.0001)
  for epoch_i in range(EPOCHS_DISC_MODEL):
    loss_train, acc_train, t_train = train_epoch_disc(model_disc, dataloader_disc, optimizer_disc)
    if epoch_i % 10 == 1 or epoch_i == EPOCHS_DISC_MODEL-1:
      loss_test, acc_test, t_test = test_disc(model_disc, dataloader_disc_test)
      #print(loss_train,loss_test,acc_train,acc_test,t_train)
      print(f"train discriminator: epoch: {epoch_i:05}, loss: {loss_train:02.4f}, loss test: {loss_test:02.4f}, acc: {acc_train:01.3f}, acc test: {acc_test:01.3f}, time: {t_train:01.3f}")
      epochs.append(epoch_i)
      losses_train.append(loss_train)
      losses_test.append(loss_test)
      plt.clf()
      plt.plot(epochs, losses_train, label='train')
      plt.plot(epochs, losses_test, label='test')
      plt.legend()
      plt.savefig(f"discriminator_model_{round_i:05}.png")

  torch.save({'model_state_dict': model_disc.state_dict(), 'epochs': epochs, "losses_train": losses_train, "losses_test": losses_test}, weight_path)
  return model_disc


In [None]:
def run_disc(round_i=1):
  fake_graphs = gen_graphs(wild=True)
  dataset_base, dataset_base_test = build_dataset()
  real_graphs = random.sample(dataset_base, len(fake_graphs))
  dataset = list()

  for g in fake_graphs:
    g_i = g.clone()
    g_i.y = torch.tensor(0.0)
    dataset.append(g_i)

  for g in real_graphs:
    g_i = g.clone()
    g_i.y = torch.tensor(1.0)
    dataset.append(g_i)

  random.shuffle(dataset)
  cut_off = int(len(dataset) * 0.8)
  dataloader_train = DataLoader(dataset[:cut_off], batch_size = BATCH_SIZE, shuffle=True)
  dataloader_test = DataLoader(dataset[cut_off:], batch_size = BATCH_SIZE, shuffle=True)

  model_disc = train_disc_model(dataloader_train, dataloader_test, round_i)
  return model_disc


In [None]:
#model_disc = run_disc() #0000390 is the last good one

# Forward Diffusion

In [None]:
def forward_diffusion(node_features, future_t):
  """
  Performs a forward diffusion process on an node_features tensor.
  Each row can theoreetically have its own future time point.
  Implements the second equation from https://youtu.be/a4Yfz2FxXiY?t=649
  """
  row_num = node_features.shape[0]

  if "class 'int'" in str(type(future_t)) or "class 'float'" in str(type(future_t)):
    future_t = torch.tensor([int(future_t)] * row_num).to(DEVICE)

  feature_dim = node_features.shape[1]
  future_t = future_t.view(-1)
  assert(row_num == future_t.numel())
  assert(future_t[0] == future_t[1]) #lets assume the belong to the same graph

  betas = generate_schedule()

  noise = torch.randn_like(node_features, device=DEVICE)
  alphas = 1. - betas
  alphas_cumprod = torch.cumprod(alphas, axis=0)
  alphabar_t = torch.gather(alphas_cumprod, 0, future_t).view(row_num, 1)
  assert(alphabar_t.numel() == row_num)

  new_node_features_mean = torch.sqrt(alphabar_t) * node_features # column-wise multiplication, now matrix #todo but we want row wise #.view(row_num,1)
  assert(new_node_features_mean.shape == node_features.shape)
  new_node_features_std = torch.sqrt(1.-alphabar_t) #this is a col vector
  new_node_features_std = new_node_features_std.repeat(1,feature_dim) #this is a matrix
  assert(new_node_features_mean.shape == new_node_features_std.shape)
  noisey_node_features =  new_node_features_mean + new_node_features_std * noise

  return noisey_node_features, noise

forward_diffusion(torch.tensor([1,2,3.], device=DEVICE).view(3,1), torch.tensor([0,0,999], device=DEVICE)), print(""), forward_diffusion(torch.tensor([1,2,3.], device=DEVICE).view(3,1), torch.tensor([999,999,999], device=DEVICE))




((tensor([[0.9948],
          [1.9985],
          [0.1775]], device='cuda:0'),
  tensor([[-0.5115],
          [-0.1381],
          [ 0.1100]], device='cuda:0')),
 None,
 (tensor([[ 0.7344],
          [-0.3833],
          [-1.2735]], device='cuda:0'),
  tensor([[ 0.7121],
          [-0.4284],
          [-1.3413]], device='cuda:0')))

# Train Jointly

In [None]:
def get_pred_from_noise(noise_pred, x_with_noise, future_t):

  row_num = x_with_noise.shape[0]
  betas = generate_schedule()
  alphas = 1. - betas
  alphas_cumprod = torch.cumprod(alphas, axis=0)
  alphabar_t = torch.gather(alphas_cumprod, 0, future_t).view(row_num, 1)

  scaled_noise = torch.sqrt(1.0-alphabar_t)
  x_without_noise = x_with_noise - scaled_noise*noise_pred
  x_without_noise = x_without_noise/torch.sqrt(alphabar_t)
  return x_without_noise


def get_noise_from_pred(original_pred, x_with_noise, future_t):

  row_num = x_with_noise.shape[0]
  betas = generate_schedule()
  alphas = 1. - betas
  alphas_cumprod = torch.cumprod(alphas, axis=0)
  alphabar_t = torch.gather(alphas_cumprod, 0, future_t).view(row_num, 1)

  scaled_noise = torch.sqrt(alphabar_t)
  noise = x_with_noise - scaled_noise*original_pred
  noise = noise / torch.sqrt(1.0-alphabar_t)

  return noise

In [None]:
def train_epoch(model, dataloader, optimizer, model_disc=None):
  schedule = generate_schedule()
  model.train()
  start_time = time.time()
  loss_list = list()
  loss_list_start = list()
  loss_row = nn.MSELoss(reduction='none')

  for batch in tqdm(dataloader): #todo batches deactivated
    if batch.x.shape[0] < 2:
      continue
    optimizer.zero_grad()
    batch.to(DEVICE)
    row_num = batch.x.shape[0]

    num_graphs_in_batch = int(torch.max(batch.batch).item()+1)
    future_t_select = torch.randint(0, TIMESTEPS, (num_graphs_in_batch,), device = DEVICE)
    future_t = torch.gather(future_t_select, 0, batch.batch)
    assert(future_t.numel() == row_num)

    mask = torch.concat((torch.tensor([False]*row_num, device=DEVICE).view(-1,1), batch.x[:,1:]>-0.5), dim=1) #this only works on original values
    x_start_gt = batch.x[mask].view(row_num, FEATURE_DIM)
    x_with_noise, noise_gt = forward_diffusion(x_start_gt, future_t)

    x_in = batch.x.clone()
    x_in[mask] = x_with_noise.flatten()
    x_start_pred = model(x_in, future_t, batch.edge_index)
    loss = F.mse_loss(x_start_gt, x_start_pred)


    #row_num = x_in.shape[0]
    #assert(x_with_noise.shape[0] == row_num)
   # assert(noise_pred.shape[0] == row_num)
    #assert(noise_pred.shape == x_with_noise.shape)
    #assert(noise_pred.shape == noise_gt.shape)
    #assert(noise_pred.shape == x_start_gt.shape)
    #x_start_pred = get_pred_from_noise(noise_pred, x_with_noise, future_t)

    #assert(F.mse_loss(get_pred_from_noise(noise_gt, x_with_noise, future_t), x_start_gt) < 0.00001)

    #loss = F.mse_loss(noise_gt, noise_pred)
    #loss_start = F.mse_loss(x_start_gt, x_start_pred)  #multiply with torch.sqrt(1.0-alphabar_t)  #F.mse_loss(x_start_gt, x_start_pred)  # torch.sum(F.mse_loss(x_start_gt, x_start_pred, dim=1)/future_t) #torch.sum(torch.sum((x_start_gt- x_start_pred)**2,dim=1) / (1+future_t.view(-1,1)))
    #loss_agg = loss + 0.5*loss_start

    #x_in = batch.x.clone()
    #x_in[mask] = x_start_pred.flatten()
    #disc_loss = torch.abs(1.0- model_disc(x_in, batch.edge_index, batch=batch.batch))
    #disc_loss = torch.mean(disc_loss)
    #loss_agg = loss + 0.25*disc_loss

    disc_loss = torch.tensor(0.0, device=DEVICE)
    if model_disc is not None:
      x_in[mask] = x_start_pred.flatten()
      disc_loss = torch.mean((1.0- model_disc(x_in, batch.edge_index, batch=batch.batch))**2)
      loss = (1.0 - GAMMA) * loss + GAMMA*disc_loss



    loss.backward()
    loss_list.append(loss.item())
    loss_list_start.append(disc_loss.item())
    optimizer.step()


  return np.mean(loss_list),np.mean(loss_list_start), time.time()-start_time

In [None]:
def log_smiles(smiles, filename):
  try:
    with open(filename, "w") as file:
      for string in smiles:
        file.write(str(string) + "\n")
    wandb.log_artifact(filename, name=f"src_txt_{SWEEP_ID}_{filename}", type="smiles")
    time.sleep(0.01)
    visualize_smiles_from_file(filename)
  except Exception as e:
    print(e)
    pass

In [None]:
def train_base_model(train_loader, epoch_num=EPOCHS_GEN, model_disc=None):
  print("train base model")
  if DEBUG:
    epoch_num = int(epoch_num/10)

  dataset_train = train_loader.dataset
  model_base = PNAnet(dataset_train)
  model_base = model_base.to(DEVICE)

  optimizer = Adam(model_base.parameters(), lr = LEARNING_RATE_GEN*0.01) #ok makes no sense
  loss_list = list()
  model_base, optimizer, loss_list, epoch_start = load_latest_checkpoint(model_base, optimizer, loss_list, epoch_i=0)

  epoch_start = min(epoch_start, epoch_num)
  print("from", epoch_start, "to", epoch_num)


  for epoch_i in range(epoch_start,epoch_num):
    try:
      loss, loss_start, time_elapsed = train_epoch(model_base, train_loader, optimizer, model_disc=model_disc)
      loss_list.append((epoch_i, loss))
      if epoch_i % 1 == 0 or epoch_i == epoch_num - 1 or BATCH_SIZE == 1:
        #plot_list(loss_list, "train_base.png", title="train loss base model", xlabel='epoch', ylabel='loss')
        mean_loss = np.mean([y for x,y in loss_list] + [loss])
        print(f"loss in epoch {epoch_i:07} is: {loss:05.4f} with mean loss {mean_loss:05.4f} with start loss {loss_start:05.4f} with runtime {time_elapsed:05.4f}")
        log({"step": epoch_i, "epoch": epoch_i, "loss": loss, "mean_loss": mean_loss, "start_loss": loss_start, "runtime": time_elapsed})

      if (epoch_i % 20 == 0 and epoch_i > 0) or epoch_i == epoch_num - 1 or BATCH_SIZE == 1:
        #graphs = generate_examples(model_base, epoch_i, betas, dataset_train)
        #graph_loss_list.append(compute_generation_loss(graphs, None))
        #print(f"generation loss: {graph_loss_list[-1]:06.4f}")
        #plot_base(graph_loss_list, loss_list)
        #pass
        print("save")
        save_model(model_base, optimizer, loss_list, epoch_i+1, upload=epoch_i % 100 == 0 and epoch_i>9) #todo really +1?
        time.sleep(0.1)
        frac, smiles_list, unique_frac = test_graph_generation(wild=False)
        frac_wild, smiles_list_wild, unique_frac_wild = test_graph_generation(wild=True)
        print("frac correct graphs: ", frac, "with wild inference", frac_wild)
        log({"step": epoch_i, "epoch": epoch_i, "frac_normal": frac, "frac_wild": frac_wild, "frac_normal_unique": unique_frac, "frac_wild_unique": unique_frac_wild})
        log_smiles(smiles_list, f"{PATH_PATTERN}_smiles_{epoch_i}_normal.txt")
        log_smiles(smiles_list_wild, f"{PATH_PATTERN}_smiles_{epoch_i}_wild.txt")
        try:
          print(smiles_list[:20])
          print(smiles_list_wild[:20])
        except Exception as e:
          print(e)
          pass


    except Exception as e:
      print("An error occurred during training: \n", str(e))
      traceback.print_exc()
      raise e


  return model_base

In [None]:
def start_experiments():
  global DISC_NOISE
  dataset_base, dataset_base_test = build_dataset()
  dataloader_base = DataLoader(dataset_base, batch_size=BATCH_SIZE, shuffle=True)
  model_base = train_base_model(dataloader_base, epoch_num = EPOCHS_GEN*1)

  if BASELINE:
    model_disc = None
  else:
    model_disc = run_disc(round_i=1)
  model_base = train_base_model(dataloader_base, epoch_num = EPOCHS_GEN*2, model_disc=model_disc)

  DISC_NOISE = DISC_NOISE
  if BASELINE:
    model_disc = None
  else:
    model_disc = run_disc(round_i=2)
  model_base = train_base_model(dataloader_base, epoch_num = EPOCHS_GEN*3, model_disc=model_disc)

  DISC_NOISE = DISC_NOISE*0.5
  if BASELINE:
    model_disc = None
  else:
    model_disc = run_disc(round_i=3)
  model_base = train_base_model(dataloader_base, epoch_num = EPOCHS_GEN*4, model_disc=model_disc)

  DISC_NOISE = DISC_NOISE*0.5
  if BASELINE:
    model_disc = None
  else:
    model_disc = run_disc(round_i=4)
  model_base = train_base_model(dataloader_base, epoch_num = EPOCHS_GEN*4, model_disc=model_disc)

  save_src_file() # do it again
  return  model_base


#0000390 is the last good one

#model_base = start_experiments()# loss in epoch 0000410 is: 0.0486 with mean loss 0.0643 with start loss 1.6791 with runtime 18.3035

In [None]:
#!rm aliamol_model_epoch_00004001_010000_generated.pickle aliamol_model_epoch_00004001.pth


### With WandB

In [None]:
import wandb
print(wandb.__path__)

['/usr/local/lib/python3.10/dist-packages/wandb']


In [None]:
sweep_config = {
    "name": "AliaMol",
    "method": "random",
    "metric": {
        "name": "ENZYMES/besttest_acc",
        "goal": "maximize",
    },
    "parameters": {
        "BATCH_SIZE": {"values": [128*2]},
        "GAMMA": {"values": [0.1]},
        "DISC_NOISE": {"values": [0.3]},  # unused
        "EPOCHS_DISC_MODEL": {"values": [100]},
        "EPOCHS_GEN": {"values": [100]},
    },
}

In [None]:
def save_src_file():
  os.system("pip list > pip_list.txt 2>&1")
  for txt_file in sorted(glob.glob('*.txt')):
    z = "".join(filter(str.isalnum, txt_file))
    wandb.log_artifact(txt_file, name=f"src_txt_{SWEEP_ID}_{z}", type="my_dataset_txt")
  for python_file in sorted(glob.glob('*.ipynb')):
    z = "".join(filter(str.isalnum, python_file))
    wandb.log_artifact(python_file, name=f"src_ipynb_{SWEEP_ID}_{z}", type="my_dataset_ipynb")
  for python_file in sorted(glob.glob('*.py')):
    z = "".join(filter(str.isalnum, python_file))
    wandb.log_artifact(python_file, name=f"src_py_{SWEEP_ID}_{z}", type="my_dataset_py")




In [None]:
#! cp ../Insa/api_key.txt api_key.txt

In [None]:
#os.system('wandb login --relogin --host=https://api.wandb.ai --key='+get_wand_api_key())

In [None]:
def get_wand_api_key():
  import sys
  IN_COLAB = 'google.colab' in sys.modules
  if not IN_COLAB:
    os.system("cp ~/api_key.txt api_key.txt")
  file_path = 'api_key.txt'
  with open(file_path, 'r') as file:
      api_key = file.read().strip()
  return api_key

#wandb.login(key=get_wand_api_key())

def main():
  global PATH_PATTERN
  with wandb.init() as run:
    PATH_PATTERN = PATH_PATTERN_BASE + '_' +str(run.name) + '_' +str(BASELINE)
    save_src_file()
    for hyper_param_name in sweep_config['parameters']:
      globals()[hyper_param_name] = run.config[hyper_param_name]
      print("set ", hyper_param_name, "=", run.config[hyper_param_name])
    return start_experiments()

def start_with_wandb(set_baseline_true=False):
  import wandb
  global SWEEP_ID, USE_WANDB, PATH_PATTERN, BASELINE
  if set_baseline_true:
    BASELINE = True
  USE_WANDB = True
  os.environ["WANDB_MODE"] = "online"
  try:
    SWEEP_ID = wandb.sweep(sweep_config, project=PROJECT_NAME)
    wandb.agent(SWEEP_ID, function=main, count=5)
  except Exception as e:
    error_message = traceback.format_exc()
    print("final error:\n", error_message)
    with open('_error_log.txt', 'a') as f:
      f.write(error_message + '\n')
    time.sleep(10)


In [None]:
start_with_wandb()
start_with_wandb(set_baseline_true=True)

Create sweep with ID: 7ru13mbd
Sweep URL: https://wandb.ai/nextaid/AliaMoleculePaper/sweeps/7ru13mbd


[34m[1mwandb[0m: Agent Starting Run: 1n572s8b with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	DISC_NOISE: 0.3
[34m[1mwandb[0m: 	EPOCHS_DISC_MODEL: 100
[34m[1mwandb[0m: 	EPOCHS_GEN: 100
[34m[1mwandb[0m: 	GAMMA: 0.2
[34m[1mwandb[0m: Currently logged in as: [33mgerritgr[0m ([33mnextaid[0m). Use [1m`wandb login --relogin`[0m to force relogin


set  BATCH_SIZE = 256
set  GAMMA = 0.2
set  DISC_NOISE = 0.3
set  EPOCHS_DISC_MODEL = 100
set  EPOCHS_GEN = 100
try to read  dataset.pickle
train base model
try to read  deg.pickle
from 0 to 10


100%|██████████| 419/419 [00:24<00:00, 16.82it/s]


loss in epoch 0000000 is: 0.1524 with mean loss 0.1524 with start loss 0.0000 with runtime 24.9154


100%|██████████| 419/419 [00:22<00:00, 18.99it/s]


loss in epoch 0000001 is: 0.0945 with mean loss 0.1138 with start loss 0.0000 with runtime 22.0702


100%|██████████| 419/419 [00:21<00:00, 19.34it/s]


loss in epoch 0000002 is: 0.0847 with mean loss 0.1041 with start loss 0.0000 with runtime 21.6692


100%|██████████| 419/419 [00:21<00:00, 19.06it/s]


loss in epoch 0000003 is: 0.0805 with mean loss 0.0985 with start loss 0.0000 with runtime 21.9890


100%|██████████| 419/419 [00:21<00:00, 19.15it/s]


loss in epoch 0000004 is: 0.0782 with mean loss 0.0947 with start loss 0.0000 with runtime 21.8843


100%|██████████| 419/419 [00:21<00:00, 19.56it/s]


loss in epoch 0000005 is: 0.0763 with mean loss 0.0918 with start loss 0.0000 with runtime 21.4271


100%|██████████| 419/419 [00:21<00:00, 19.46it/s]


loss in epoch 0000006 is: 0.0746 with mean loss 0.0895 with start loss 0.0000 with runtime 21.5337


100%|██████████| 419/419 [00:21<00:00, 19.48it/s]


loss in epoch 0000007 is: 0.0736 with mean loss 0.0876 with start loss 0.0000 with runtime 21.5182


100%|██████████| 419/419 [00:21<00:00, 19.38it/s]


loss in epoch 0000008 is: 0.0732 with mean loss 0.0861 with start loss 0.0000 with runtime 21.6208


100%|██████████| 419/419 [00:21<00:00, 19.78it/s]


loss in epoch 0000009 is: 0.0727 with mean loss 0.0848 with start loss 0.0000 with runtime 21.1935
save
try to read  aliamol_paper_comfy-sweep-1_True_model_epoch_00000010_004000_wFalse_generated.pickle
An error occurred: [Errno 2] No such file or directory: 'aliamol_paper_comfy-sweep-1_True_model_epoch_00000010_004000_wFalse_generated.pickle'
try to read  dataset.pickle
try to read  deg.pickle
read checkpoint of epoch 00000010 from disc.
generate samples batched
load g DataBatch(edge_index=[2, 137468], x=[43154, 11], batch=[43154], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.59it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137928], x=[43287, 11], batch=[43287], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.66it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 84], x=[28, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137276], x=[43102, 11], batch=[43102], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.67it/s]


generated graphs  [Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 112], x=[36, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137288], x=[43106, 11], batch=[43106], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.67it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
try to write  aliamol_paper_comfy-sweep-1_True_model_epoch_00000010_004000_wFalse_generated.pickle


100%|██████████| 4000/4000 [00:16<00:00, 247.70it/s]


try to read  aliamol_paper_comfy-sweep-1_True_model_epoch_00000010_004000_wTrue_generated.pickle
An error occurred: [Errno 2] No such file or directory: 'aliamol_paper_comfy-sweep-1_True_model_epoch_00000010_004000_wTrue_generated.pickle'
try to read  dataset.pickle
try to read  deg.pickle
read checkpoint of epoch 00000010 from disc.
generate samples batched
load g DataBatch(edge_index=[2, 136920], x=[42996, 11], batch=[42996], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.75it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 84], x=[28, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 112], x=[36, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137100], x=[43054, 11], batch=[43054], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.77it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137348], x=[43122, 11], batch=[43122], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:55<00:00, 17.90it/s]


generated graphs  [Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137868], x=[43271, 11], batch=[43271], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:55<00:00, 17.87it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
try to write  aliamol_paper_comfy-sweep-1_True_model_epoch_00000010_004000_wTrue_generated.pickle


100%|██████████| 4000/4000 [00:15<00:00, 255.79it/s]


frac correct graphs:  0.263 with wild inference 0.06775
Number of rows must be a positive integer, not 0
Number of rows must be a positive integer, not 0
[('C1CC2CCC3CC23C1', 0), ('CC12CC13C1CC2C13C', 1), ('CCC1C2C(C)C3CC312', 5), ('CC1C2CCC3(C)C1C23', 7), ('CCCC(C)CC(C)C', 12), ('CCC1CCC(C)CC1', 25), ('CCCC(C)C', 30), ('CCCC1CCC(C)C1', 32), ('CC12C3C1C14CC3C12C4', 39), ('C1C2CC3C(C2)C32CC12', 49), ('CC12CC3(C1)C1CCC123', 55), ('CCCCCCC', 63), ('CC1(C2CC2)CCCC1', 66), ('CCCCCCC(C)C', 67), ('CCC(C)C1(C)C2CC21', 71), ('CC1CC23CC14CC2C43', 77), ('CCC12CC3CC1CC32', 78), ('CC(C)C12CC1CC2C', 81), ('CC12C3CCCC1C32', 85), ('CCCCC1(C)CCC1', 86)]
[('CCC1CC2CC23CC13', 4), ('CC1CC(C(C)C)C1', 9), ('CCCCCC1CC1C', 31), ('CC12CCC34C5C1C53C24', 43), ('CCCCC(C)C(C)C', 56), ('CC1CC(C)(C)C12CC2', 64), ('CC1CCCC(C)CC1', 69), ('CCCC(CC)CC', 109), ('CCCC1CCCC1C', 123), ('CCC1(C)CCC1(C)C', 129), ('CC1CCCCC2CC12', 149), ('CCC(C)CC(C)CC', 161), ('CCCCC(C)CCC', 171), ('CCC(C)CC(C)CC', 180), ('CCC(CC)C1CC1C', 186

100%|██████████| 419/419 [00:22<00:00, 18.61it/s]


loss in epoch 0000010 is: 0.0722 with mean loss 0.0837 with start loss 0.0000 with runtime 22.5231


100%|██████████| 419/419 [00:22<00:00, 18.77it/s]


loss in epoch 0000011 is: 0.0715 with mean loss 0.0827 with start loss 0.0000 with runtime 22.3263


100%|██████████| 419/419 [00:21<00:00, 19.23it/s]


loss in epoch 0000012 is: 0.0708 with mean loss 0.0818 with start loss 0.0000 with runtime 21.7951


100%|██████████| 419/419 [00:21<00:00, 19.46it/s]


loss in epoch 0000013 is: 0.0702 with mean loss 0.0810 with start loss 0.0000 with runtime 21.5354


100%|██████████| 419/419 [00:21<00:00, 19.39it/s]


loss in epoch 0000014 is: 0.0700 with mean loss 0.0803 with start loss 0.0000 with runtime 21.6094


100%|██████████| 419/419 [00:21<00:00, 19.36it/s]


loss in epoch 0000015 is: 0.0698 with mean loss 0.0797 with start loss 0.0000 with runtime 21.6444


100%|██████████| 419/419 [00:21<00:00, 19.31it/s]


loss in epoch 0000016 is: 0.0697 with mean loss 0.0791 with start loss 0.0000 with runtime 21.6988


100%|██████████| 419/419 [00:21<00:00, 19.66it/s]


loss in epoch 0000017 is: 0.0696 with mean loss 0.0786 with start loss 0.0000 with runtime 21.3130


100%|██████████| 419/419 [00:21<00:00, 19.54it/s]


loss in epoch 0000018 is: 0.0692 with mean loss 0.0781 with start loss 0.0000 with runtime 21.4519


100%|██████████| 419/419 [00:21<00:00, 19.24it/s]


loss in epoch 0000019 is: 0.0692 with mean loss 0.0777 with start loss 0.0000 with runtime 21.7796
save
try to read  aliamol_paper_comfy-sweep-1_True_model_epoch_00000020_004000_wFalse_generated.pickle
An error occurred: [Errno 2] No such file or directory: 'aliamol_paper_comfy-sweep-1_True_model_epoch_00000020_004000_wFalse_generated.pickle'
try to read  dataset.pickle
try to read  deg.pickle
read checkpoint of epoch 00000020 from disc.
generate samples batched
load g DataBatch(edge_index=[2, 138176], x=[43358, 11], batch=[43358], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.81it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 138236], x=[43375, 11], batch=[43375], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.71it/s]


generated graphs  [Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137800], x=[43251, 11], batch=[43251], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.81it/s]


generated graphs  [Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137100], x=[43051, 11], batch=[43051], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:55<00:00, 17.87it/s]


generated graphs  [Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
try to write  aliamol_paper_comfy-sweep-1_True_model_epoch_00000020_004000_wFalse_generated.pickle


100%|██████████| 4000/4000 [00:15<00:00, 253.94it/s]


try to read  aliamol_paper_comfy-sweep-1_True_model_epoch_00000020_004000_wTrue_generated.pickle
An error occurred: [Errno 2] No such file or directory: 'aliamol_paper_comfy-sweep-1_True_model_epoch_00000020_004000_wTrue_generated.pickle'
try to read  dataset.pickle
try to read  deg.pickle
read checkpoint of epoch 00000020 from disc.
generate samples batched
load g DataBatch(edge_index=[2, 138884], x=[43557, 11], batch=[43557], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.85it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137728], x=[43229, 11], batch=[43229], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:55<00:00, 18.09it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 40], x=[15, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 84], x=[28, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137856], x=[43268, 11], batch=[43268], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.83it/s]


generated graphs  [Data(edge_index=[2, 84], x=[28, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 84], x=[28, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 138084], x=[43331, 11], batch=[43331], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:55<00:00, 17.91it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
try to write  aliamol_paper_comfy-sweep-1_True_model_epoch_00000020_004000_wTrue_generated.pickle


100%|██████████| 4000/4000 [00:15<00:00, 251.30it/s]


frac correct graphs:  0.311 with wild inference 0.2695
Number of rows must be a positive integer, not 0
Number of rows must be a positive integer, not 0
[('CCCCC1CC(C)C1', 6), ('CCOCCC1CC1', 8), ('OCC12CCC3(C1)OC23', 11), ('CCC(O)CCO', 14), ('CC1C2CC23C(C)OC13', 24), ('CC12C(O)C1C1CCC12', 30), ('CC1CC(C)C1CCO', 34), ('CCC12CC1OC2CO', 37), ('CC1C2COC1C2CO', 38), ('CC12COC3CC31O2', 40), ('OC1C(O)C2(O)CCC12', 41), ('OC1OC2COC3C1C23', 47), ('CCC(O)CC(O)CO', 49), ('CC12CC13CC1C(C3)C12', 53), ('CC12CC1OCC2CO', 55), ('CCC1CC2(C)CC12C', 56), ('CC1CCC23CC1C2C3', 60), ('OCC12CC13C2C31CO1', 62), ('CC12CCC3CC1C32', 63), ('CCC(C)CC(C)O', 64)]
[('CC1C2C1C2(C)CCO', 0), ('CCC1(CC)CC1CO', 1), ('CC(O)C1CCC2CC21', 9), ('CCC(C)CC1CCC1', 12), ('CCC1CC2(C)CC1C2', 13), ('CC12CC3CCC1C32C', 19), ('CC1CC2C3CC23C1', 24), ('OCC12CC13CC1C2C13', 28), ('C1CC2C1C1C34CC3C214', 29), ('OC12COC3CC1CC32', 30), ('C1CC(CC2CC2)C1', 31), ('OCCCCCCCO', 34), ('OC1C2CC13CCC23', 41), ('CC1C2CCCC23CC13', 55), ('CC1C(C)C1(C)C1CC1',

100%|██████████| 419/419 [00:21<00:00, 19.16it/s]


loss in epoch 0000020 is: 0.0687 with mean loss 0.0773 with start loss 0.0000 with runtime 21.8711
save
try to read  aliamol_paper_comfy-sweep-1_True_model_epoch_00000021_004000_wFalse_generated.pickle
An error occurred: [Errno 2] No such file or directory: 'aliamol_paper_comfy-sweep-1_True_model_epoch_00000021_004000_wFalse_generated.pickle'
try to read  dataset.pickle
try to read  deg.pickle
read checkpoint of epoch 00000021 from disc.
generate samples batched
load g DataBatch(edge_index=[2, 137240], x=[43093, 11], batch=[43093], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.78it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 60], x=[21, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 84], x=[28, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137728], x=[43232, 11], batch=[43232], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.65it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 135844], x=[42696, 11], batch=[42696], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.72it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 138032], x=[43317, 11], batch=[43317], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:57<00:00, 17.54it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11])]
try to write  aliamol_paper_comfy-sweep-1_True_model_epoch_00000021_004000_wFalse_generated.pickle


100%|██████████| 4000/4000 [00:15<00:00, 252.36it/s]


try to read  aliamol_paper_comfy-sweep-1_True_model_epoch_00000021_004000_wTrue_generated.pickle
An error occurred: [Errno 2] No such file or directory: 'aliamol_paper_comfy-sweep-1_True_model_epoch_00000021_004000_wTrue_generated.pickle'
try to read  dataset.pickle
try to read  deg.pickle
read checkpoint of epoch 00000021 from disc.
generate samples batched
load g DataBatch(edge_index=[2, 137980], x=[43301, 11], batch=[43301], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.80it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137620], x=[43201, 11], batch=[43201], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:55<00:00, 17.86it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137992], x=[43305, 11], batch=[43305], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:55<00:00, 17.94it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 136812], x=[42971, 11], batch=[42971], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:55<00:00, 17.97it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
try to write  aliamol_paper_comfy-sweep-1_True_model_epoch_00000021_004000_wTrue_generated.pickle


100%|██████████| 4000/4000 [00:15<00:00, 257.34it/s]


frac correct graphs:  0.182 with wild inference 0.0145
Number of rows must be a positive integer, not 0
Number of rows must be a positive integer, not 0
[('OCC(O)CO', 3), ('CC(O)(O)C1CC1(C)O', 4), ('CC1C2COC1(O)CO2', 6), ('CC12C3CC4C15CC5C432', 11), ('CCCC1CC(CC)C1', 20), ('CC(O)COC1(C)CO1', 28), ('CCC1(C)CC2(C)CC12', 34), ('OOC1C(O)C2C3OC132', 40), ('CCC(O)CCO', 45), ('OCC1CC2CCC2O1', 46), ('COCC(C)OC1CC1', 59), ('CCC(C)C(O)CO', 68), ('COC(C)(O)OC(C)O', 69), ('CC1CC23CC24CC134', 73), ('CCOC(C)C(O)O', 74), ('OCCC(CO)CCO', 85), ('OCCC1CC1(O)CO', 89), ('OCC1CC23C4C2(O)C143', 98), ('CC1(CCC2CC2)CO1', 106), ('CC12COC(C)(O)C1C2', 112)]
[('CCC1(O)CCCC1', 417), ('CC(C)C(C)C(O)O', 425), ('CC1CC(C)C(C)C1', 699), ('CCCC(C)(O)CO', 751), ('CCC(O)CC1CC1', 769), ('CC1C(CO)C1CCO', 785), ('OCC1CC(O)C1', 848), ('CCCCC(C)CC', 945), ('CCCC1CC2CC12', 1252), ('CC(CO)C(C)OCO', 1282), ('OCCCC(O)CCO', 1283), ('CC(O)C1(C)CC1O', 1302), ('CC(CO)C(O)CCO', 1338), ('CCC(O)CC(C)CO', 1347), ('OCCCC(O)CO', 1431), ('OC

100%|██████████| 419/419 [00:22<00:00, 18.69it/s]


loss in epoch 0000021 is: 0.0687 with mean loss 0.0769 with start loss 0.0000 with runtime 22.4195


100%|██████████| 419/419 [00:22<00:00, 19.03it/s]


loss in epoch 0000022 is: 0.0687 with mean loss 0.0766 with start loss 0.0000 with runtime 22.0218


100%|██████████| 419/419 [00:22<00:00, 18.80it/s]


loss in epoch 0000023 is: 0.0684 with mean loss 0.0762 with start loss 0.0000 with runtime 22.2981


100%|██████████| 419/419 [00:22<00:00, 18.63it/s]


loss in epoch 0000024 is: 0.0684 with mean loss 0.0759 with start loss 0.0000 with runtime 22.4937


100%|██████████| 419/419 [00:22<00:00, 18.75it/s]


loss in epoch 0000025 is: 0.0684 with mean loss 0.0756 with start loss 0.0000 with runtime 22.3576


100%|██████████| 419/419 [00:22<00:00, 18.69it/s]


loss in epoch 0000026 is: 0.0681 with mean loss 0.0754 with start loss 0.0000 with runtime 22.4189


100%|██████████| 419/419 [00:23<00:00, 17.85it/s]


loss in epoch 0000027 is: 0.0680 with mean loss 0.0751 with start loss 0.0000 with runtime 23.4786


100%|██████████| 419/419 [00:23<00:00, 18.16it/s]


loss in epoch 0000028 is: 0.0679 with mean loss 0.0749 with start loss 0.0000 with runtime 23.0754


100%|██████████| 419/419 [00:22<00:00, 18.76it/s]


loss in epoch 0000029 is: 0.0678 with mean loss 0.0746 with start loss 0.0000 with runtime 22.3355
save
try to read  aliamol_paper_comfy-sweep-1_True_model_epoch_00000030_004000_wFalse_generated.pickle
An error occurred: [Errno 2] No such file or directory: 'aliamol_paper_comfy-sweep-1_True_model_epoch_00000030_004000_wFalse_generated.pickle'
try to read  dataset.pickle
try to read  deg.pickle
read checkpoint of epoch 00000030 from disc.
generate samples batched
load g DataBatch(edge_index=[2, 137768], x=[43242, 11], batch=[43242], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:57<00:00, 17.53it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 84], x=[28, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 138140], x=[43348, 11], batch=[43348], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.58it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 84], x=[28, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137920], x=[43284, 11], batch=[43284], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:57<00:00, 17.50it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 136952], x=[43013, 11], batch=[43013], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:57<00:00, 17.45it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
try to write  aliamol_paper_comfy-sweep-1_True_model_epoch_00000030_004000_wFalse_generated.pickle


100%|██████████| 4000/4000 [00:16<00:00, 246.22it/s]


try to read  aliamol_paper_comfy-sweep-1_True_model_epoch_00000030_004000_wTrue_generated.pickle
An error occurred: [Errno 2] No such file or directory: 'aliamol_paper_comfy-sweep-1_True_model_epoch_00000030_004000_wTrue_generated.pickle'
try to read  dataset.pickle
try to read  deg.pickle
read checkpoint of epoch 00000030 from disc.
generate samples batched
load g DataBatch(edge_index=[2, 138236], x=[43374, 11], batch=[43374], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.58it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137272], x=[43100, 11], batch=[43100], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.79it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137644], x=[43207, 11], batch=[43207], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.72it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137168], x=[43071, 11], batch=[43071], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.78it/s]


generated graphs  [Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
try to write  aliamol_paper_comfy-sweep-1_True_model_epoch_00000030_004000_wTrue_generated.pickle


100%|██████████| 4000/4000 [00:16<00:00, 249.34it/s]


frac correct graphs:  0.2805 with wild inference 0.35875
Number of rows must be a positive integer, not 0
Number of rows must be a positive integer, not 0
[('CCC1C(C)C1(C)CO', 0), ('CC12CCOC1(CO)C2', 1), ('CCC(O)C1CCC1C', 2), ('CC1CCC1(C)C', 4), ('CC1CC12CC2CO', 6), ('CCCC1CC(CC)C1', 8), ('CC12CC34CC3CC14C2', 23), ('CC1C2CC3CC3OC12', 28), ('COCC(O)(O)C(C)O', 36), ('CC1CC2CC2(CO)O1', 41), ('CCC(C)C(C)O', 42), ('CC(O)C1CC1C(O)O', 45), ('OC1OCC12CC1CC12', 48), ('COCC1COC1(C)O', 49), ('CCC(OC)C1(C)CC1', 53), ('CC(CCCO)C1CO1', 54), ('CC1CC1CC(C)(O)O', 55), ('CCCC(C)OCCO', 57), ('CCCC1CCCC1', 58), ('C1C2C1C13CC14C(O3)C24', 59)]
[('CC1OC2CC1CCO2', 3), ('CCCC1CCC1C', 7), ('CC1CC1C1CC12CC2', 11), ('CC(CO)C1OCC1O', 12), ('CC1C2CC3CC1C3C2', 15), ('CCC1CC2OC(C)C12', 18), ('OC12CCC3C4C1C342', 19), ('CC(O)C(O)C(C)CO', 26), ('CC1CCC2C(C)C12', 28), ('CCC1CC2C3CC3C12', 29), ('COC(CO)C1CCC1', 40), ('CC1CC23C(O)CC2C13', 41), ('O1C2C3C1C1C4OC41C23', 45), ('C1CCC2CC2CC1', 46), ('CC1C2COCC3C1C23', 47), ('CC

100%|██████████| 419/419 [00:23<00:00, 18.18it/s]


loss in epoch 0000030 is: 0.0676 with mean loss 0.0744 with start loss 0.0000 with runtime 23.0541


100%|██████████| 419/419 [00:22<00:00, 18.33it/s]


loss in epoch 0000031 is: 0.0675 with mean loss 0.0742 with start loss 0.0000 with runtime 22.8656


100%|██████████| 419/419 [00:22<00:00, 19.03it/s]


loss in epoch 0000032 is: 0.0675 with mean loss 0.0740 with start loss 0.0000 with runtime 22.0255


100%|██████████| 419/419 [00:21<00:00, 19.15it/s]


loss in epoch 0000033 is: 0.0673 with mean loss 0.0738 with start loss 0.0000 with runtime 21.8894


100%|██████████| 419/419 [00:22<00:00, 18.99it/s]


loss in epoch 0000034 is: 0.0672 with mean loss 0.0736 with start loss 0.0000 with runtime 22.0681


100%|██████████| 419/419 [00:22<00:00, 18.98it/s]


loss in epoch 0000035 is: 0.0670 with mean loss 0.0734 with start loss 0.0000 with runtime 22.0762


100%|██████████| 419/419 [00:21<00:00, 19.07it/s]


loss in epoch 0000036 is: 0.0671 with mean loss 0.0733 with start loss 0.0000 with runtime 21.9751


100%|██████████| 419/419 [00:21<00:00, 19.12it/s]


loss in epoch 0000037 is: 0.0668 with mean loss 0.0731 with start loss 0.0000 with runtime 21.9147


100%|██████████| 419/419 [00:21<00:00, 19.26it/s]


loss in epoch 0000038 is: 0.0668 with mean loss 0.0729 with start loss 0.0000 with runtime 21.7635


100%|██████████| 419/419 [00:22<00:00, 18.90it/s]


loss in epoch 0000039 is: 0.0667 with mean loss 0.0728 with start loss 0.0000 with runtime 22.1790
save
try to read  aliamol_paper_comfy-sweep-1_True_model_epoch_00000040_004000_wFalse_generated.pickle
An error occurred: [Errno 2] No such file or directory: 'aliamol_paper_comfy-sweep-1_True_model_epoch_00000040_004000_wFalse_generated.pickle'
try to read  dataset.pickle
try to read  deg.pickle
read checkpoint of epoch 00000040 from disc.
generate samples batched
load g DataBatch(edge_index=[2, 137448], x=[43152, 11], batch=[43152], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.66it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 138272], x=[43384, 11], batch=[43384], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:57<00:00, 17.38it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137364], x=[43126, 11], batch=[43126], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:57<00:00, 17.53it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 138016], x=[43310, 11], batch=[43310], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:58<00:00, 17.18it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 84], x=[28, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11])]
try to write  aliamol_paper_comfy-sweep-1_True_model_epoch_00000040_004000_wFalse_generated.pickle


100%|██████████| 4000/4000 [00:16<00:00, 245.53it/s]


try to read  aliamol_paper_comfy-sweep-1_True_model_epoch_00000040_004000_wTrue_generated.pickle
An error occurred: [Errno 2] No such file or directory: 'aliamol_paper_comfy-sweep-1_True_model_epoch_00000040_004000_wTrue_generated.pickle'
try to read  dataset.pickle
try to read  deg.pickle
read checkpoint of epoch 00000040 from disc.
generate samples batched
load g DataBatch(edge_index=[2, 138208], x=[43367, 11], batch=[43367], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.70it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137396], x=[43137, 11], batch=[43137], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:55<00:00, 17.86it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 84], x=[28, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 138484], x=[43446, 11], batch=[43446], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.75it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137260], x=[43098, 11], batch=[43098], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.79it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11])]
try to write  aliamol_paper_comfy-sweep-1_True_model_epoch_00000040_004000_wTrue_generated.pickle


100%|██████████| 4000/4000 [00:16<00:00, 247.17it/s]


frac correct graphs:  0.11725 with wild inference 0.018
Number of rows must be a positive integer, not 0
Number of rows must be a positive integer, not 0
[('C1CC2CCC(C1)C2', 2), ('CC1(CO)CCC1O', 9), ('OCC1CC1C1OCO1', 34), ('CCOC1(CC)C=C1O', 38), ('CCC(O)CCC(O)O', 53), ('CC1C(O)N1C1CC1', 66), ('CCC(C=O)C(O)C=O', 74), ('OCC1C2NC(O)(O)C12', 87), ('CCC1C2C(C(=O)O)C12', 88), ('CCC1C(O)CC1CO', 92), ('CCOC(O)(C=O)OC', 93), ('O=C1COCCC1CO', 99), ('CC12COC13C1OC3C12', 104), ('CC1CC12CCCC=N2', 109), ('OC12CC3C4C1C2C34O', 131), ('CCCCCCCCC', 167), ('OCCC(O)OO', 182), ('CCC12C(O)C3CC1C32', 191), ('O=CCC12CC1(O)C2O', 193), ('C1C2OC3CC45C2N4C135', 201)]
[('OCCC(O)C(O)CO', 8), ('N1=NN1n1nnnn1', 41), ('CCCC(C)C1(O)CO1', 59), ('CC(O)c1nn2nnn12', 109), ('COCC(O)C(O)(O)O', 116), ('CCCC(C)CO', 199), ('CCC1CC2CC1C2', 259), ('COC1(O)COCC1=O', 262), ('CCC(CC)C(O)O', 296), ('CCC1(O)CC1OCO', 306), ('O=C1CCOC=NN=N1', 319), ('OCCCCCOCO', 400), ('OCCC1(O)CCCO1', 406), ('OCCC1CC1C1CO1', 490), ('n1nnn2nnn2n1', 657)

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
frac_normal,▆█▃▇▁
frac_normal_unique,▆█▃▇▁
frac_wild,▂▆▁█▁
frac_wild_unique,▂▆▁█▁
loss,█▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
mean_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
runtime,█▃▂▂▂▁▂▂▂▁▄▃▂▂▂▂▂▁▁▂▂▃▃▃▃▃▃▅▅▃▄▄▃▂▃▃▂▂▂▃
start_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
step,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
epoch,39.0
frac_normal,0.11725
frac_normal_unique,0.11725
frac_wild,0.018
frac_wild_unique,0.018
loss,0.06674
mean_loss,0.07278
runtime,22.17898
start_loss,0.0
step,39.0


[34m[1mwandb[0m: Agent Starting Run: 0bxew7tz with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	DISC_NOISE: 0.3
[34m[1mwandb[0m: 	EPOCHS_DISC_MODEL: 100
[34m[1mwandb[0m: 	EPOCHS_GEN: 100
[34m[1mwandb[0m: 	GAMMA: 0.2


set  BATCH_SIZE = 256
set  GAMMA = 0.2
set  DISC_NOISE = 0.3
set  EPOCHS_DISC_MODEL = 100
set  EPOCHS_GEN = 100
try to read  dataset.pickle
train base model
try to read  deg.pickle
from 0 to 10


100%|██████████| 419/419 [00:21<00:00, 19.23it/s]


loss in epoch 0000000 is: 0.1834 with mean loss 0.1834 with start loss 0.0000 with runtime 21.7935


100%|██████████| 419/419 [00:21<00:00, 19.55it/s]


loss in epoch 0000001 is: 0.0995 with mean loss 0.1275 with start loss 0.0000 with runtime 21.4348


100%|██████████| 419/419 [00:21<00:00, 19.47it/s]


loss in epoch 0000002 is: 0.0862 with mean loss 0.1138 with start loss 0.0000 with runtime 21.5214


100%|██████████| 419/419 [00:21<00:00, 19.22it/s]


loss in epoch 0000003 is: 0.0855 with mean loss 0.1080 with start loss 0.0000 with runtime 21.8031


100%|██████████| 419/419 [00:22<00:00, 18.95it/s]


loss in epoch 0000004 is: 0.0806 with mean loss 0.1026 with start loss 0.0000 with runtime 22.1123


100%|██████████| 419/419 [00:22<00:00, 18.94it/s]


loss in epoch 0000005 is: 0.0746 with mean loss 0.0978 with start loss 0.0000 with runtime 22.1319


100%|██████████| 419/419 [00:22<00:00, 18.50it/s]


loss in epoch 0000006 is: 0.0726 with mean loss 0.0944 with start loss 0.0000 with runtime 22.6539


100%|██████████| 419/419 [00:22<00:00, 18.43it/s]


loss in epoch 0000007 is: 0.0717 with mean loss 0.0918 with start loss 0.0000 with runtime 22.7467


100%|██████████| 419/419 [00:22<00:00, 18.54it/s]


loss in epoch 0000008 is: 0.0711 with mean loss 0.0896 with start loss 0.0000 with runtime 22.6005


100%|██████████| 419/419 [00:22<00:00, 18.53it/s]


loss in epoch 0000009 is: 0.0709 with mean loss 0.0879 with start loss 0.0000 with runtime 22.6201
save
try to read  aliamol_paper_bumbling-sweep-2_True_model_epoch_00000010_004000_wFalse_generated.pickle
An error occurred: [Errno 2] No such file or directory: 'aliamol_paper_bumbling-sweep-2_True_model_epoch_00000010_004000_wFalse_generated.pickle'
try to read  dataset.pickle
try to read  deg.pickle
read checkpoint of epoch 00000010 from disc.
generate samples batched
load g DataBatch(edge_index=[2, 137200], x=[43081, 11], batch=[43081], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.65it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 136844], x=[42981, 11], batch=[42981], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.78it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 84], x=[28, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137056], x=[43041, 11], batch=[43041], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.64it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 136820], x=[42972, 11], batch=[42972], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.72it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
try to write  aliamol_paper_bumbling-sweep-2_True_model_epoch_00000010_004000_wFalse_generated.pickle


100%|██████████| 4000/4000 [00:15<00:00, 253.30it/s]


try to read  aliamol_paper_bumbling-sweep-2_True_model_epoch_00000010_004000_wTrue_generated.pickle
An error occurred: [Errno 2] No such file or directory: 'aliamol_paper_bumbling-sweep-2_True_model_epoch_00000010_004000_wTrue_generated.pickle'
try to read  dataset.pickle
try to read  deg.pickle
read checkpoint of epoch 00000010 from disc.
generate samples batched
load g DataBatch(edge_index=[2, 137572], x=[43187, 11], batch=[43187], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.85it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137796], x=[43248, 11], batch=[43248], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:55<00:00, 17.92it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 112], x=[36, 11]), Data(edge_index=[2, 112], x=[36, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 138156], x=[43351, 11], batch=[43351], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:56<00:00, 17.85it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 84], x=[28, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 60], x=[21, 11]), Data(edge_index=[2, 112], x=[36, 11])]
generate samples batched
load g DataBatch(edge_index=[2, 137496], x=[43164, 11], batch=[43164], ptr=[1001]) tensor([  0,   0,   0,  ..., 999, 999, 999], device='cuda:0')


100%|██████████| 1000/1000 [00:55<00:00, 17.89it/s]


generated graphs  [Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 60], x=[21, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11]), Data(edge_index=[2, 144], x=[45, 11])]
try to write  aliamol_paper_bumbling-sweep-2_True_model_epoch_00000010_004000_wTrue_generated.pickle


100%|██████████| 4000/4000 [00:15<00:00, 253.29it/s]


frac correct graphs:  0.32025 with wild inference 0.198
Number of rows must be a positive integer, not 0
Number of rows must be a positive integer, not 0
[('CCC(C)CCC1CC1', 1), ('CC1CCC12CCCC2', 2), ('CCC1C23CC4(C)C2C143', 5), ('CCC(C)CCC(C)C', 6), ('CC12CCCC3C(C1)C32', 10), ('CC1(C)CCCC1(C)C', 12), ('CC1CCC2C(C)C12C', 13), ('CC1C2CC2CC1(C)C', 14), ('CCC1C2C(CC)C12C', 18), ('CCC1CC2C3C1C23', 19), ('CC12CC3CC3(C)C1C2', 22), ('CCCC(C)C(C)CC', 23), ('CC1C2(C)CCC12C', 30), ('CCC12C3CC4C35C1C425', 31), ('CC(C)C(C)(C)C1CC1', 33), ('CC12CC3C4C1C1C4C312', 34), ('CCCC1CC2CC1C2', 40), ('CCC12CCC1C2C', 43), ('CC1C2(C)C3C4C3(C)C142', 52), ('CC(C)CC12C3C1C32C', 54)]
[('CCC1CC(CC)C1C', 3), ('CC(C)CCC1CC1C', 10), ('C1CCCCCCC1', 12), ('CCCC(C)(C)CC', 14), ('CCCCC(C)CCC', 19), ('CCCCC(C)CC', 24), ('CCC1(C)CCC(C)C1', 33), ('CC1CCCC(C)C1C', 34), ('CCCCCCCC', 39), ('CCCC1CC(C)C1C', 48), ('CCCCC1CC(C)C1', 65), ('CCC12C(C)CC1C2C', 69), ('CCCC(C)CC1CC1', 70), ('CCCCC1(C)CC1C', 72), ('CCCCC1(C)CC1C', 74), ('C

100%|██████████| 419/419 [00:22<00:00, 18.81it/s]


loss in epoch 0000010 is: 0.0705 with mean loss 0.0864 with start loss 0.0000 with runtime 22.2800


100%|██████████| 419/419 [00:22<00:00, 18.86it/s]


loss in epoch 0000011 is: 0.0703 with mean loss 0.0852 with start loss 0.0000 with runtime 22.2196


100%|██████████| 419/419 [00:22<00:00, 18.95it/s]


loss in epoch 0000012 is: 0.0701 with mean loss 0.0841 with start loss 0.0000 with runtime 22.1116


100%|██████████| 419/419 [00:22<00:00, 18.77it/s]


loss in epoch 0000013 is: 0.0700 with mean loss 0.0831 with start loss 0.0000 with runtime 22.3275


100%|██████████| 419/419 [00:22<00:00, 18.77it/s]


loss in epoch 0000014 is: 0.0698 with mean loss 0.0823 with start loss 0.0000 with runtime 22.3293


100%|██████████| 419/419 [00:22<00:00, 18.55it/s]


loss in epoch 0000015 is: 0.0697 with mean loss 0.0815 with start loss 0.0000 with runtime 22.5948


100%|██████████| 419/419 [00:22<00:00, 18.84it/s]


loss in epoch 0000016 is: 0.0695 with mean loss 0.0809 with start loss 0.0000 with runtime 22.2448


100%|██████████| 419/419 [00:22<00:00, 18.44it/s]


loss in epoch 0000017 is: 0.0694 with mean loss 0.0803 with start loss 0.0000 with runtime 22.7315


 29%|██▉       | 122/419 [00:06<00:16, 18.56it/s]

In [None]:
#!rm *.pth