In [1]:
import torch
from tqdm import tqdm
import os
from torch_sparse import SparseTensor
from tqdm import tqdm

In [5]:
import os.path

import torch
from torch_scatter import scatter
from torch_sparse import SparseTensor
from math import pi as PI
from rdkit import Chem
from rdkit.Chem import AllChem
from tqdm import tqdm
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')

def get_dist(pos,i,j):
    dist = (pos[i] - pos[j]).pow(2).sum(dim=-1).sqrt()
    return dist

def get_angle(pos,i,j,k):
        # Calculate angles. 0 to pi
    pos_ji = pos[i] - pos[j]
    pos_jk = pos[k] - pos[j]
    a = (pos_ji * pos_jk).sum(dim=-1) # cos_angle * |pos_ji| * |pos_jk|
    b = torch.cross(pos_ji, pos_jk).norm(dim=-1) # sin_angle * |pos_ji| * |pos_jk|
    angle = torch.atan2(b, a)
    return angle

def get_torsion(pos,k,i,j,t):
    pos_jt = pos[j] - pos[t]
    pos_ji = pos[j] - pos[i]
    pos_jk = pos[j] - pos[k]
    dist_ji = pos_ji.pow(2).sum(dim=-1).sqrt()
    plane1 = torch.cross(pos_ji, pos_jt)
    plane2 = torch.cross(pos_ji, pos_jk)
    a = (plane1 * plane2).sum(dim=-1) # cos_angle * |plane1| * |plane2|
    b = (torch.cross(plane1, plane2) * pos_ji).sum(dim=-1) / dist_ji
    torsion1 = torch.atan2(b, a) # -pi to pi
    torsion1 = torch.abs(torsion1)
    # torsion1[torsion1<=0]+=2*PI # 0 to 2pi
    return torsion1
def generate_dist_index(mol,valid=False):
    atom_num = mol.GetNumAtoms()
    start = torch.arange(atom_num).repeat_interleave(atom_num)
    end = torch.arange(atom_num).repeat(atom_num)
    if valid:
        valid_index = start != end
        start = start[valid_index]
        end = end[valid_index]
    return start,end
def generate_angle_index(mol,valid=False):
    i_all = []
    j_all = []
    k_all = []
    atoms  = [atom for atom in mol.GetAtoms()]
    for index in range(mol.GetNumAtoms()):
        atom = atoms[index]
        set1 = set([bond.GetBeginAtomIdx() for bond in atom.GetBonds()])
        set2 = set([bond.GetEndAtomIdx() for bond in atom.GetBonds()])
        group = list(set1 | set2)
        lth = len(group)
        if len(group) <= 2:
            continue
        i = torch.tensor(group).repeat_interleave(lth)
        j = torch.ones(lth**2,dtype=torch.int64) * index
        k = torch.tensor(group).repeat(lth)
        i_all.append(i)
        j_all.append(j)
        k_all.append(k)
    i_all = torch.cat(i_all)
    j_all = torch.cat(j_all)
    k_all = torch.cat(k_all)
    if valid:
        valid_index = (i_all != k_all) & (i_all != j_all) & (k_all != j_all)
        i_all = i_all[valid_index]
        j_all = j_all[valid_index]
        k_all = k_all[valid_index]
    return i_all,j_all,k_all

def generate_torsion_index(mol,valid=False):
    atoms  = [atom for atom in mol.GetAtoms()]
    group_dict = {}
    bond_start = [bond.GetBeginAtomIdx() for bond in mol.GetBonds()]
    bond_end = [bond.GetEndAtomIdx() for bond in mol.GetBonds()]
    for index in range(mol.GetNumAtoms()):
        atom = atoms[index]
        set1 = set([bond.GetBeginAtomIdx() for bond in atom.GetBonds()])
        set2 = set([bond.GetEndAtomIdx() for bond in atom.GetBonds()])
        group = list(set1 | set2)
        group_dict[index] = group
    i_all = []
    j_all = []
    k_all =[]
    t_all = []
    for atom_i,atom_j in zip(bond_start,bond_end):
        i_group = group_dict[atom_i]
        j_group = group_dict[atom_j]
        k = torch.tensor(i_group).repeat_interleave(len(j_group))
        t = torch.tensor(j_group).repeat(len(i_group))
        i = torch.ones(len(j_group)*len(i_group),dtype=torch.int64) * atom_i
        j = torch.ones(len(j_group)*len(i_group),dtype=torch.int64) * atom_j
        i_all.append(i)
        j_all.append(j)
        k_all.append(k)
        t_all.append(t)
    i_all = torch.cat(i_all)
    j_all = torch.cat(j_all)
    k_all = torch.cat(k_all)
    t_all = torch.cat(t_all)
    if valid:
        valid_index = (k_all != t_all) & (k_all != i_all) & (k_all != j_all) & (t_all != i_all) & (t_all != j_all)
        i_all = i_all[valid_index]
        j_all = j_all[valid_index]
        k_all = k_all[valid_index]
        t_all = t_all[valid_index]
    return k_all,i_all,j_all,t_all


In [7]:
d = "/home/zjh/mr/downstream/dataset/regression"
for p in os.listdir(d):

    dataset_path = os.path.join(d,p)
    if not dataset_path == "/home/zjh/mr/downstream/dataset/regression/FreeSolv.pth":
        continue
    if os.path.isdir(dataset_path):
        continue
    print(dataset_path)
    data = torch.load(dataset_path)

    smiles_list = []
    dist_list = []
    nmr_list = []
    angle_list = []
    torsion_list = []
    pos_list = []
    count =0
    label_list = []

    for row in tqdm(iter(data),total=len(data)):
        smiles = row[0]
        nmr = row[1]
        label = row[2]
        try:
            mol = AllChem.AddHs(Chem.MolFromSmiles(smiles))
            AllChem.EmbedMolecule(mol)
            AllChem.MMFFOptimizeMolecule(mol)
            pos=[]
            edge_index=[]
            num_nodes=len(mol.GetAtoms())
            for i in range(num_nodes):
                for j in range(i):
                    edge_index.append([i,j])
                x,y,z =mol.GetConformer().GetAtomPosition(i)
                pos.append([x,y,z])
            pos = torch.tensor(pos)
            dist_i,dist_j = generate_dist_index(mol,valid=True)
            angle_i,angle_j,angle_k = generate_angle_index(mol,valid=True)
            torsion_k,torsion_i,torsion_j,torsion_t = generate_torsion_index(mol,valid=True)
            dist = get_dist(pos,dist_i,dist_j)
            angle = get_angle(pos,angle_i,angle_j,angle_k)
            torsion = get_torsion(pos,torsion_k,torsion_i,torsion_j,torsion_t)

        except:
            print(smiles)
            continue

        if pos.shape[0] > 127:
            count+=1
            print("f{smiles} out 127")
            print(smiles)
            continue


        if (len(dist)==0 or len(angle)==0 or len(torsion)==0):
            print("valid of length")
            print(smiles)
            continue

        try:
            spt = SparseTensor(row=dist_i.long(),col=dist_j.long(),value=dist)
            dist_matrix = spt.to_dense()
        except:
            print("valid cannot covert adj dense")
            print(smiles)
            continue

        smiles_list.append(smiles)
        dist_list.append(dist_matrix)
        pos_list.append(pos)
        nmr_list.append(nmr)
        angle_list.append([angle_i.byte(),angle_j.byte(),angle_k.byte(),angle])
        label_list.append(label)

        torsion_list.append([torsion_k.byte(),torsion_i.byte(),torsion_j.byte(),torsion_t.byte(),torsion])
        count +=1
    print(f"{len(data) - count} is valid")
    base_dir = os.path.dirname(dataset_path)
    base_name = os.path.basename(dataset_path).split(".")[0]
    save_dir  = os.path.join(base_dir,base_name)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    smiles_save_path = os.path.join(save_dir,"smiles.pth")
    dist_save_path = os.path.join(save_dir,"dist.pth")
    nmr_save_path= os.path.join(save_dir,"nmr.pth")
    angle_save_path = os.path.join(save_dir,"angle.pth")
    torsion_save_path = os.path.join(save_dir,"torsion.pth")
    pos_save_path = os.path.join(save_dir,"pos.pth")
    label_save_path = os.path.join(save_dir,"label.pth")
    torch.save(smiles_list,smiles_save_path)
    torch.save(dist_list,dist_save_path)
    torch.save(nmr_list,nmr_save_path)
    torch.save(angle_list,angle_save_path)
    torch.save(torsion_list,torsion_save_path)
    torch.save(pos_list,pos_save_path)
    torch.save(label_list,label_save_path)

/home/zjh/mr/downstream/dataset/regression/FreeSolv.pth


  2%|▏         | 16/642 [00:00<00:13, 45.44it/s]

valid of length
C(Br)Br


  8%|▊         | 51/642 [00:01<00:10, 53.77it/s]

valid of length
C(F)(F)(F)F


 11%|█         | 72/642 [00:01<00:11, 49.09it/s]

valid of length
N


 15%|█▍        | 95/642 [00:01<00:09, 56.92it/s]

valid of length
CCl
valid of length
C(I)I


 21%|██▏       | 137/642 [00:02<00:07, 63.93it/s]

valid of length
CI


 29%|██▉       | 188/642 [00:03<00:06, 70.30it/s]

valid of length
C=O


 32%|███▏      | 203/642 [00:03<00:07, 58.02it/s]

valid of length
S


 34%|███▎      | 216/642 [00:03<00:07, 60.67it/s]

valid of length
C(Br)(Br)Br
valid of length
C(F)(F)(F)Br


 39%|███▉      | 251/642 [00:04<00:06, 56.40it/s]

valid of length
C(Cl)Cl


 45%|████▌     | 290/642 [00:04<00:05, 64.93it/s]

valid of length
C(F)(F)Cl
valid of length
C(Cl)(Cl)Cl
valid of length
C


 53%|█████▎    | 342/642 [00:05<00:04, 68.74it/s]

valid of length
C(F)Cl


 56%|█████▌    | 359/642 [00:06<00:04, 66.19it/s]

valid of length
CBr
valid of length
CF


 70%|███████   | 452/642 [00:07<00:03, 57.31it/s]

valid of length
C(Cl)(Cl)(Cl)Cl


100%|██████████| 642/642 [00:11<00:00, 55.91it/s]


18 is valid


In [15]:
d = "/home/zjh/mr/downstream/dataset/regression"
for p in os.listdir(d):

    dataset_path = os.path.join(d,p)
    if dataset_path == "/home/zjh/mr/downstream/dataset/regression/FreeSolv.pth":
        continue
    if os.path.isdir(dataset_path):
        continue
    print(dataset_path)
    data = torch.load(dataset_path)

    smiles_list = []
    dist_list = []
    nmr_list = []
    angle_list = []
    torsion_list = []
    pos_list = []
    count =0
    label_list = []

    for row in tqdm(iter(data),total=len(data)):
        smiles = row[0]
        nmr = row[1]
        label = row[2]
        try:
            mol = AllChem.AddHs(Chem.MolFromSmiles(smiles))
            AllChem.EmbedMolecule(mol)
            AllChem.MMFFOptimizeMolecule(mol)
            pos=[]
            edge_index=[]
            num_nodes=len(mol.GetAtoms())
            for i in range(num_nodes):
                for j in range(i):
                    edge_index.append([i,j])
                x,y,z =mol.GetConformer().GetAtomPosition(i)
                pos.append([x,y,z])
            pos = torch.tensor(pos)
            dist_i,dist_j = generate_dist_index(mol,valid=True)
            angle_i,angle_j,angle_k = generate_angle_index(mol,valid=True)
            torsion_k,torsion_i,torsion_j,torsion_t = generate_torsion_index(mol,valid=True)
            dist = get_dist(pos,dist_i,dist_j)
            angle = get_angle(pos,angle_i,angle_j,angle_k)
            torsion = get_torsion(pos,torsion_k,torsion_i,torsion_j,torsion_t)

        except:
            print(smiles)
            continue

        if pos.shape[0] > 127:
            count+=1
            print("f{smiles} out 127")
            print(smiles)
            continue


        if (len(dist)==0 or len(angle)==0 or len(torsion)==0):
            print("valid of length")
            print(smiles)
            continue

        try:
            spt = SparseTensor(row=dist_i.long(),col=dist_j.long(),value=dist)
            dist_matrix = spt.to_dense()
        except:
            print("valid cannot covert adj dense")
            print(smiles)
            continue

        smiles_list.append(smiles)
        dist_list.append(dist_matrix)
        pos_list.append(pos)
        nmr_list.append(nmr)
        angle_list.append([angle_i.byte(),angle_j.byte(),angle_k.byte(),angle])
        label_list.append(label)

        torsion_list.append([torsion_k.byte(),torsion_i.byte(),torsion_j.byte(),torsion_t.byte(),torsion])
        count +=1
    print(f"{len(data) - count} is valid")
    base_dir = os.path.dirname(dataset_path)
    base_name = os.path.basename(dataset_path).split(".")[0]
    save_dir  = os.path.join(base_dir,base_name)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    smiles_save_path = os.path.join(save_dir,"smiles.pth")
    dist_save_path = os.path.join(save_dir,"dist.pth")
    nmr_save_path= os.path.join(save_dir,"nmr.pth")
    angle_save_path = os.path.join(save_dir,"angle.pth")
    torsion_save_path = os.path.join(save_dir,"torsion.pth")
    pos_save_path = os.path.join(save_dir,"pos.pth")
    label_save_path = os.path.join(save_dir,"label.pth")
    torch.save(smiles_list,smiles_save_path)
    torch.save(dist_list,dist_save_path)
    torch.save(nmr_list,nmr_save_path)
    torch.save(angle_list,angle_save_path)
    torch.save(torsion_list,torsion_save_path)
    torch.save(pos_list,pos_save_path)
    torch.save(label_list,label_save_path)

/home/zjh/mr/downstream/dataset/regression/Lipophilicity.pth


 11%|█         | 443/4200 [01:00<19:22,  3.23it/s]

f{smiles} out 127
CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]3O[C@H](C)C[C@@H]([C@H]3O)N(C)C)[C@](C)(O)C[C@@H](C)\C(=N/OCOCCOC)\[C@H](C)[C@@H](O)[C@]1(C)O


 29%|██▉       | 1237/4200 [02:54<32:00,  1.54it/s]

f{smiles} out 127
CCn1c(C)c(C(=O)O)c(c2cccc(c2)N3CCN(CC3)c4ccc(NS(=O)(=O)c5ccc(N[C@H](CCN6CCC(O)CC6)CSc7ccccc7)c(c5)S(=O)(=O)C(F)(F)F)cc4)c1c8ccc(Cl)cc8


 33%|███▎      | 1399/4200 [03:21<32:18,  1.44it/s]

f{smiles} out 127
CC[C@H](C)[C@H](N)C1=N[C@@H](CS1)C(=O)N[C@@H](CC(C)C)C(=O)N[C@H](CCC(=O)O)C(=O)N[C@@H]([C@@H](C)CC)C(=O)N[C@H]2CCCCNC(=O)[C@H](CC(=O)N)NC(=O)[C@@H](CC(=O)O)NC(=O)[C@H](Cc3cnc[nH]3)NC(=O)[C@@H](Cc4ccccc4)NC(=O)[C@@H](NC(=O)[C@@H](CCCN)NC2=O)[C@@H](C)CC


 86%|████████▌ | 3594/4200 [08:34<03:24,  2.97it/s]

CCCCCCCCCC(=O)N[C@@H](Cc1c[nH]c2ccccc12)C(=O)N[C@@H](CC(=O)N)C(=O)N[C@@H](CC(=O)O)C(=O)N[C@H]3[C@@H](C)OC(=O)[C@H](CC(=O)c4ccccc4N)NC(=O)[C@@H](NC(=O)[C@@H](CO)NC(=O)CNC(=O)[C@H](CC(=O)O)NC(=O)[C@@H](C)NC(=O)[C@H](CC(=O)O)NC(=O)[C@H](CCCN)NC(=O)CNC3=O)[C@H](C)CC(=O)O


100%|██████████| 4200/4200 [09:59<00:00,  7.00it/s]


1 is valid
/home/zjh/mr/downstream/dataset/regression/LogS.pth


  1%|          | 44/4799 [00:03<04:11, 18.92it/s]

valid of length
ClC(Cl)(Cl)Cl


  4%|▍         | 192/4799 [00:09<00:53, 86.22it/s]

valid of length
ClC
valid of length
BrCBr
valid of length
BrCCl
valid of length
ICI
valid of length
BrC(Br)Br
valid of length
BrC(Cl)Cl
valid of length
ClC(Cl)F
valid of length
ClC(F)F
valid of length
BrC(F)(F)F
valid of length
ClC(Cl)(F)F


 17%|█▋        | 802/4799 [00:26<02:08, 31.07it/s]

valid of length
O=C=O
valid of length
BrC(Br)Cl


 24%|██▍       | 1144/4799 [00:40<01:39, 36.73it/s]

valid of length
BrC(Br)(Br)Br


 26%|██▌       | 1234/4799 [00:43<01:11, 49.76it/s]

valid of length
FC
valid of length
ClCF


 31%|███       | 1469/4799 [00:51<07:02,  7.87it/s]

ClC1(Cl)[C@@]2(Cl)C(Cl)=C(Cl)[C@@]1(Cl)C1C2CO[SH0](=O)OC1


 32%|███▏      | 1530/4799 [00:55<03:27, 15.72it/s]

f{smiles} out 127
O=C(O)[C@@H]1[C@@H](O)C[C@]2(O)OC1C[C@@H](O[C@H]1[C@@H](O)[C@@H](N)[C@H](O)[C@@H](C)O1)/C=C/C=C/C=C/C=C/C=C/C=C/C=C/[C@H](C)[C@@H](O)[C@@H](C)[C@H](C)OC(=O)C[C@H](O)C[C@H](O)CC[C@@H](O)[C@H](O)C[C@H](O)C2


 32%|███▏      | 1535/4799 [00:58<13:19,  4.08it/s]

f{smiles} out 127
O=CCC1C(OC2C(O)C(N(C)C)C(OC3OC(C)C(O)C(O)(C)C3)C(C)O2)C(C)C(O)CC(=O)OC(CC)C(COC2C(OC)C(OC)C(O)C(C)O2)/C=C(\C)/C=C/C(=O)C(C)C1


 32%|███▏      | 1547/4799 [00:58<05:05, 10.66it/s]

valid of length
BrC(F)F


 33%|███▎      | 1582/4799 [01:00<04:50, 11.06it/s]

f{smiles} out 127
Oc1c(C(C)(C)C)cc(Cc2c(C)c(Cc3cc(C(C)(C)C)c(O)c(C(C)(C)C)c3)c(C)c(Cc3cc(C(C)(C)C)c(O)c(C(C)(C)C)c3)c2C)cc1C(C)(C)C


 65%|██████▌   | 3130/4799 [02:20<00:39, 42.02it/s]

valid of length
BrC
valid of length
ClC(Cl)(Cl)F
valid of length
ClC(F)(F)F


 80%|████████  | 3859/4799 [02:53<01:32, 10.18it/s]

valid of length
IC
valid of length
FC(F)(F)F


 88%|████████▊ | 4217/4799 [03:25<15:50,  1.63s/it]

O=C(OC1C[C@@H]2N(C)[C@@H](C1)CC2)C(CO)c1ccccc1


 88%|████████▊ | 4219/4799 [03:25<11:31,  1.19s/it]

valid of length
ClCCl


 90%|█████████ | 4334/4799 [03:32<00:25, 18.39it/s]

valid of length
ClC(Cl)Cl


 90%|█████████ | 4343/4799 [03:34<00:54,  8.36it/s]

valid of length
FC(F)F


100%|██████████| 4799/4799 [03:55<00:00, 20.40it/s]


27 is valid
/home/zjh/mr/downstream/dataset/regression/ESOL.pth


 10%|▉         | 110/1128 [00:04<00:28, 35.50it/s]

valid of length
ClCCl


 16%|█▌        | 175/1128 [00:06<00:40, 23.56it/s]

valid of length
CI


 28%|██▊       | 311/1128 [00:12<00:44, 18.32it/s]

valid of length
ClC(Cl)(Cl)Cl


 32%|███▏      | 358/1128 [00:14<00:28, 27.22it/s]

valid of length
ClC(Br)Br


 36%|███▌      | 403/1128 [00:15<00:22, 32.06it/s]

valid of length
ClCBr


 43%|████▎     | 489/1128 [00:18<00:24, 26.38it/s]

valid of length
BrC(Br)Br


 51%|█████▏    | 579/1128 [00:22<00:26, 20.69it/s]

valid of length
BrC(Cl)Cl
valid of length
CBr


 54%|█████▍    | 614/1128 [00:23<00:14, 36.26it/s]

valid of length
BrCBr


 64%|██████▍   | 724/1128 [00:28<00:20, 19.28it/s]

valid of length
BrC(Br)(Br)Br


 69%|██████▉   | 776/1128 [00:30<00:11, 30.67it/s]

valid of length
ClC(Cl)Cl


 83%|████████▎ | 936/1128 [00:36<00:07, 24.39it/s]

valid of length
C


 96%|█████████▌| 1084/1128 [00:42<00:01, 35.74it/s]

valid of length
ICI


100%|██████████| 1128/1128 [00:44<00:00, 25.28it/s]


13 is valid


In [16]:
d = "/home/zjh/mr/downstream/dataset/classify"
for p in os.listdir(d):

    dataset_path = os.path.join(d,p)
    if dataset_path == "/home/zjh/mr/downstream/dataset/regression/FreeSolv.pth":
        continue
    if os.path.isdir(dataset_path):
        continue
    print(dataset_path)
    data = torch.load(dataset_path)

    smiles_list = []
    dist_list = []
    nmr_list = []
    angle_list = []
    torsion_list = []
    pos_list = []
    count =0
    label_list = []

    for row in tqdm(iter(data),total=len(data)):
        smiles = row[0]
        nmr = row[1]
        label = row[2]
        try:
            mol = AllChem.AddHs(Chem.MolFromSmiles(smiles))
            AllChem.EmbedMolecule(mol)
            AllChem.MMFFOptimizeMolecule(mol)
            pos=[]
            edge_index=[]
            num_nodes=len(mol.GetAtoms())
            for i in range(num_nodes):
                for j in range(i):
                    edge_index.append([i,j])
                x,y,z =mol.GetConformer().GetAtomPosition(i)
                pos.append([x,y,z])
            pos = torch.tensor(pos)
            dist_i,dist_j = generate_dist_index(mol,valid=True)
            angle_i,angle_j,angle_k = generate_angle_index(mol,valid=True)
            torsion_k,torsion_i,torsion_j,torsion_t = generate_torsion_index(mol,valid=True)
            dist = get_dist(pos,dist_i,dist_j)
            angle = get_angle(pos,angle_i,angle_j,angle_k)
            torsion = get_torsion(pos,torsion_k,torsion_i,torsion_j,torsion_t)

        except:
            print(smiles)
            continue

        if pos.shape[0] > 127:
            count+=1
            print("f{smiles} out 127")
            print(smiles)
            continue


        if (len(dist)==0 or len(angle)==0 or len(torsion)==0):
            print("valid of length")
            print(smiles)
            continue

        try:
            spt = SparseTensor(row=dist_i.long(),col=dist_j.long(),value=dist)
            dist_matrix = spt.to_dense()
        except:
            print("valid cannot covert adj dense")
            print(smiles)
            continue

        smiles_list.append(smiles)
        dist_list.append(dist_matrix)
        pos_list.append(pos)
        nmr_list.append(nmr)
        angle_list.append([angle_i.byte(),angle_j.byte(),angle_k.byte(),angle])
        label_list.append(label)

        torsion_list.append([torsion_k.byte(),torsion_i.byte(),torsion_j.byte(),torsion_t.byte(),torsion])
        count +=1
    print(f"{len(data) - count} is valid")
    base_dir = os.path.dirname(dataset_path)
    base_name = os.path.basename(dataset_path).split(".")[0]
    save_dir  = os.path.join(base_dir,base_name)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    smiles_save_path = os.path.join(save_dir,"smiles.pth")
    dist_save_path = os.path.join(save_dir,"dist.pth")
    nmr_save_path= os.path.join(save_dir,"nmr.pth")
    angle_save_path = os.path.join(save_dir,"angle.pth")
    torsion_save_path = os.path.join(save_dir,"torsion.pth")
    pos_save_path = os.path.join(save_dir,"pos.pth")
    label_save_path = os.path.join(save_dir,"label.pth")
    torch.save(smiles_list,smiles_save_path)
    torch.save(dist_list,dist_save_path)
    torch.save(nmr_list,nmr_save_path)
    torch.save(angle_list,angle_save_path)
    torch.save(torsion_list,torsion_save_path)
    torch.save(pos_list,pos_save_path)
    torch.save(label_list,label_save_path)

/home/zjh/mr/downstream/dataset/classify/Ames.pth


 10%|█         | 676/6512 [00:42<10:41,  9.10it/s]

Cc1c(N)nc([C@H](CC(N)=O)NC[C@H](N)C(N)=O)nc1C(=O)N[C@H](C(=O)N[C@@H](C)[C@@H](O)[C@@H](C)C(=O)N[C@H](C(=O)NCCc1nc(-c2nc(C(=O)NCCCN[C@H](C)c3ccccc3)cs2)cs1)[C@@H](C)O)[C@@H](O[C@H]1O[C@@H](CO)[C@H](O)[C@@H](O)[C@@H]1O[C@@H]1O[C@H](CO)[C@@H](O)[C@H](OC(N)=O)[C@H]1O)c1c[nH]cn1


 13%|█▎        | 831/6512 [00:53<15:05,  6.27it/s]

Cc1c(N)nc([C@H](CC(N)=O)NC[C@@H](N)C(N)=O)nc1C(=O)N[C@@H](C(=O)N[C@H](C)[C@H](O)[C@H](C)C(=O)N[C@H](C(=O)NCCc1nc(-c2nc(C(=O)NCCCCN=C(N)N)cs2)cs1)[C@@H](C)O)[C@@H](O[C@@H]1O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]1O[C@@H]1O[C@H](CO)[C@@H](O)[C@H](OC(N)=O)[C@H]1O)c1c[nH]cn1


 13%|█▎        | 840/6512 [00:53<09:18, 10.15it/s]

valid of length
ClC(Cl)Cl


 13%|█▎        | 859/6512 [00:54<05:03, 18.62it/s]

valid of length
C=O


 13%|█▎        | 879/6512 [00:56<04:32, 20.67it/s]

valid of length
[O-][Cl+3]([O-])([O-])[O-]


 14%|█▍        | 920/6512 [00:59<16:16,  5.72it/s]

f{smiles} out 127
CC(=C/C=C/C=C(C)/C=C/C=C(\C)C(=O)O[C@@H]1O[C@H](CO[C@@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@@H](O)[C@H](O)[C@H]1O)/C=C/C=C(/C)C(=O)O[C@@H]1O[C@H](CO[C@@H]2O[C@H](CO)[C@@H](O)[C@H](O)[C@H]2O)[C@@H](O)[C@H](O)[C@H]1O


 16%|█▌        | 1051/6512 [01:09<03:10, 28.70it/s]

valid of length
ClC(Br)Br


 17%|█▋        | 1085/6512 [01:11<04:46, 18.97it/s]

COCCOCCO[PH]1(OCCOCCOC)N[PH](OCCOCCOC)(OCCOCCOC)N[PH](OCCOCCOC)(OCCOCCOC)N1


 19%|█▉        | 1246/6512 [01:22<18:34,  4.72it/s]

f{smiles} out 127
CCCCCCCCCCCCCCCCCC(=O)NCCCC[C@H](NC(=O)CC[C@@H](NC(=O)[C@H](C)NC(=O)[C@@H](C)O[C@@H]1[C@H](NC(C)=O)[C@H](O)O[C@H](CO)[C@H]1O)C(N)=O)C(=O)O


 21%|██▏       | 1399/6512 [01:30<07:20, 11.62it/s]

valid of length
ClC(Cl)(Cl)Br


 25%|██▌       | 1642/6512 [01:44<04:48, 16.90it/s]

valid of length
CBr


 26%|██▌       | 1671/6512 [02:23<6:00:06,  4.46s/it]

COc1cc2c(cc1OC)[C@@]13CCN4CC5=CCO[C@@H]6CC(=O)N2[C@H]1[C@H]6[C@@H]5C[C@@H]43


 27%|██▋       | 1739/6512 [02:29<08:33,  9.29it/s]  

Cc1c(N)nc([C@H](CC(N)=O)NC[C@H](N)C(N)=O)nc1C(=O)N[C@H](C(=O)N[C@H](C)[C@@H](O)[C@H](C)C(=O)N[C@H](C(=O)NCCc1nc(-c2nc(C(=O)O)cs2)cs1)[C@@H](C)O)[C@@H](O[C@H]1O[C@@H](CO)[C@H](O)[C@H](O)[C@@H]1O[C@H]1O[C@H](CO)[C@@H](O)[C@H](OC(N)=O)[C@@H]1O)c1c[nH]cn1


 29%|██▊       | 1872/6512 [02:37<06:56, 11.13it/s]

[C-]#[O+]


 31%|███       | 2004/6512 [02:43<03:27, 21.76it/s]

f{smiles} out 127
CCC1OC(=O)C(C)C(OC2CC(C)(OC)C(O)C(C)O2)C(C)C(OC2OC(C)CC(N(C)C)C2O)C(C)(O)CC(C)/C(=N/OCOCCOC)C(C)C(O)C1(C)O


 35%|███▌      | 2284/6512 [03:00<03:34, 19.69it/s]

valid of length
BrC(Br)Br


 38%|███▊      | 2493/6512 [03:15<14:33,  4.60it/s]

f{smiles} out 127
COC1(CCC(C)COC2OC(CO)C(O)C(O)C2O)OC2CC3C4CC=C5CC(OC6OC(CO)C(O)C(OC7OC(CO)C(O)C(O)C7O)C6OC6OC(C)C(O)C(O)C6O)CCC5(C)C4CCC3(C)C2C1C


 38%|███▊      | 2504/6512 [03:17<12:43,  5.25it/s]

f{smiles} out 127
CC1CCC2(OC1)OC1CC3C4CC=C5CC(OC6OC(CO)C(O)C(OC7OC(CO)C(O)C(O)C7O)C6OC6OC(C)C(O)C(O)C6O)CCC5(C)C4CCC3(C)C1C2C


 40%|████      | 2607/6512 [03:23<02:58, 21.91it/s]

valid of length
FC(F)(Cl)Br


 43%|████▎     | 2785/6512 [03:32<02:32, 24.45it/s]

valid of length
ICI


 48%|████▊     | 3097/6512 [03:51<09:25,  6.04it/s]

f{smiles} out 127
C[C@H]1CC[C@@H]2[C@@H](C)[C@H]3[C@@H](C[C@H]4[C@@H]5CC=C6C[C@@H](O[C@@H]7O[C@H](CO)[C@H](O)[C@H](O[C@@H]8O[C@@H](CO)[C@@H](O)[C@@H](O)[C@H]8O)[C@@H]7O[C@@H]7O[C@@H](C)[C@H](O)[C@@H](O)[C@H]7O)CC[C@]6(C)[C@H]5CC[C@@]43C)N2C1


 48%|████▊     | 3109/6512 [03:53<09:41,  5.85it/s]

Cc1c(N)nc([C@H](CC(N)=O)NC[C@H](N)C(N)=O)nc1C(=O)N[C@H](C(=O)N[C@H](C)[C@@H](O)[C@H](C)C(=O)N[C@H](C(=O)NCCc1nc(-c2nc(C(=O)NCCCNCCCNC(=N)CC3CCC(Cl)CC3)cs2)cs1)[C@H](C)O)[C@@H](O[C@@H]1O[C@@H](CO)[C@H](O)[C@@H](O)[C@H]1O[C@@H]1O[C@H](CO)[C@@H](O)[C@H](OC(N)=O)[C@@H]1O)c1c[nH]cn1
valid of length
FC(Cl)(Cl)Cl


 48%|████▊     | 3141/6512 [03:57<13:24,  4.19it/s]

f{smiles} out 127
C[C@@H]1OC(=O)C[C@H](O)C[C@H](O)CC[C@@H](O)[C@H](O)C[C@H](O)C[C@]2(O)C[C@H](O)[C@H](C(=O)O)[C@@H](C[C@@H](O[C@H]3O[C@H](C)[C@H](O)[C@H](N)[C@@H]3O)/C=C/C=C/C=C/C=C/C=C/C=C/C=C/[C@H](C)[C@@H](O)[C@H]1C)O2


 51%|█████     | 3301/6512 [04:05<02:33, 20.93it/s]

f{smiles} out 127
CC1CCC2C(C)C3C(CC4C5CC=C6CC(OC7OC(CO)C(OC8OC(C)C(O)C(O)C8O)C(O)C7OC7OC(C)C(O)C(O)C7O)CCC6(C)C5CCC43C)N2C1


 53%|█████▎    | 3433/6512 [04:14<02:19, 22.08it/s]

[C-]#N


 54%|█████▍    | 3521/6512 [04:26<20:06,  2.48it/s]

Cc1c(N)nc([C@H](CC(N)=O)NC[C@H](N)C(N)=O)nc1C(=O)N[C@H](C(=O)N[C@@H](C)[C@@H](O)[C@H](C)C(=O)N[C@H](C(=O)NCCc1nc(-c2nc(C(=O)NCCCN(C)CCC(N(C)c3ccc(OCc4ccccc4)c(OCc4ccccc4)c3)N(C)c3ccc(OCc4ccccc4)c(OCc4ccccc4)c3)cs2)cs1)[C@@H](C)O)[C@@H](O[C@@H]1O[C@@H](CO)[C@@H](O)[C@H](O)[C@@H]1O[C@@H]1O[C@H](CO)[C@@H](O)[C@H](OC(N)=O)[C@@H]1O)c1c[nH]cn1


 55%|█████▌    | 3600/6512 [04:30<01:52, 25.79it/s]

valid of length
CCl


 56%|█████▋    | 3671/6512 [04:34<03:13, 14.69it/s]

valid of length
IC(I)I
valid of length
ClCBr


 57%|█████▋    | 3708/6512 [04:35<01:28, 31.83it/s]

valid of length
O=C=S


 57%|█████▋    | 3719/6512 [04:37<07:20,  6.34it/s]

f{smiles} out 127
CCCCCCCCCCCCCCCCCC(=O)NCCCCC(NC(=O)CCC(NC(=O)C(C)NC(=O)C(C)OC(C(C=O)NC(C)=O)C(O)C(O)CO)C(N)=O)C(=O)O


 63%|██████▎   | 4115/6512 [04:59<04:04,  9.79it/s]

Cc1c(N)nc(C(CC(N)=O)NCC(N)C(N)=O)nc1C(=O)NC(C(=O)NC(C)C(O)C(C)C(=O)NC(C(=O)NCCc1nc(-c2nc(C(=O)NCCc3c[nH]cn3)cs2)cs1)C(C)O)C(OC1OC(CO)C(O)C(O)C1OC1OC(CO)C(O)C(OC(N)=O)C1O)c1c[nH]cn1


 65%|██████▌   | 4246/6512 [05:06<01:39, 22.83it/s]

valid of length
N#CCl


 66%|██████▌   | 4299/6512 [05:19<30:27,  1.21it/s]

CC[C@H](C)[C@@H]1NC(=O)[C@@H](CCCN=C(N)N)NC(=O)[C@H](CC(=O)O)NC(=O)[C@@H](CCSC)NC(=O)[C@H](CCCN=C(N)N)NC(=O)CNC(=O)CNC(=O)[C@H](Cc2ccccc2)NC(=O)[C@@H](NC(=O)[C@H](CO)NC(=O)[C@@H](CO)NC(=O)[C@H](CCCN=C(N)N)NC(=O)[C@@H](CCCN=C(N)N)NC(=O)[C@H](CC(C)C)NC(=O)[C@@H](N)CO)CSSC[C@H](C(=O)N[C@@H](CC(N)=O)C(=O)N[C@@H](CO)C(=O)N[C@H](Cc2ccccc2)C(=O)N[C@@H](CCCN=C(N)N)C(=O)N[C@@H](Cc2ccc(O)cc2)C(=O)O)NC(=O)CNC(=O)[C@H](CC(C)C)NC(=O)CNC(=O)[C@H](CO)NC(=O)[C@@H](CCC(N)=O)NC(=O)[C@@H](C)NC(=O)CNC1=O


 67%|██████▋   | 4389/6512 [05:25<01:20, 26.36it/s]

f{smiles} out 127
C[C@@H]1O[C@H](O[C@H]2[C@@H](O[C@@H]3[C@@H](CO)O[C@@H](O[C@H]4CC[C@@]5(C)C(=CC[C@@H]6[C@@H]7C[C@@H]8[C@H]([C@H](C)[C@@H]9CC[C@@H](C)CN98)[C@@]7(C)CC[C@H]65)C4)[C@H](O)[C@H]3O)O[C@@H](C)[C@H](O)[C@H]2O)[C@H](O)[C@H](O)[C@H]1O


 67%|██████▋   | 4392/6512 [05:32<22:14,  1.59it/s]

f{smiles} out 127
C/C=C/C[C@@H](C)[C@@H](O)[C@H]1C(=O)N[C@@H](CC)C(=O)N(C)CC(=O)N(C)[C@@H](CC(C)C)C(=O)N[C@@H](C(C)C)C(=O)N(C)[C@@H](CC(C)C)C(=O)N[C@@H](C)C(=O)N[C@H](C)C(=O)N(C)[C@@H](CC(C)C)C(=O)N(C)[C@@H](CC(C)C)C(=O)N(C)[C@@H](C(C)C)C(=O)N1C


 75%|███████▍  | 4882/6512 [06:08<05:00,  5.43it/s]

f{smiles} out 127
CC1OC(OC2C(OC3C(OC4CCC5(C)C(CCC6(C)C5CC=C5C7CC(C)(C)CC(O)C7(C)CCC56C)C4(C)CO)OC(C(=O)O)C(O)C3O)OC(CO)C(O)C2O)C(O)C(O)C1O


 78%|███████▊  | 5101/6512 [06:22<00:49, 28.32it/s]

valid of length
FC(F)(F)I


 79%|███████▊  | 5125/6512 [06:25<03:46,  6.13it/s]

f{smiles} out 127
COC1=C(OC)C(=O)C(C/C=C(\C)CC/C=C(\C)CC/C=C(\C)CC/C=C(\C)CC/C=C(\C)CC/C=C(\C)CC/C=C(\C)CC/C=C(\C)CC/C=C(\C)CCC=C(C)C)=C(C)C1=O


 80%|████████  | 5218/6512 [06:34<06:02,  3.57it/s]

f{smiles} out 127
Cc1c2oc3c(C)ccc(C(=O)N[C@@H]4C(=O)N[C@@H](C(C)C)C(=O)N5CCC[C@H]5C(=O)N(C)CC(=O)N(C)[C@H](C(C)C)C(=O)O[C@@H]4C)c3nc-2c(C(=O)N[C@@H]2C(=O)N[C@@H](C(C)C)C(=O)N3CCC[C@H]3C(=O)N(C)CC(=O)N(C)[C@@H](C(C)C)C(=O)O[C@H]2C)c(N)c1=O


 81%|████████  | 5259/6512 [06:36<01:19, 15.84it/s]

O=C(OCC1OC(OC(=O)c2cc(O)c(OC(=O)c3cc(O)c(O)c(O)c3)c(O)c2)C(OC(=O)c2cc(O)c(O)c(O)c2)C(OC(=O)c2cc(O)c(O)c(O)c2)C1OC(=O)c1cc(O)c(O)c(O)c1)c1cc(O)c(O)c(O)c1


 82%|████████▏ | 5342/6512 [06:40<00:50, 23.17it/s]

valid of length
ClC(Cl)(Cl)Cl


 83%|████████▎ | 5416/6512 [06:48<04:12,  4.34it/s]

f{smiles} out 127
COC1(CCC(C)COC2OC(CO)C(O)C(O)C2O)OC2CC3C4CC=C5CC(OC6OC(CO)C(OC7OC(C)C(O)C(O)C7O)C(O)C6OC6OC(C)C(O)C(O)C6O)CCC5(C)C4CCC3(C)C2C1C


 83%|████████▎ | 5425/6512 [06:49<02:05,  8.66it/s]

valid of length
ClCCl


 84%|████████▍ | 5488/6512 [06:54<01:02, 16.26it/s]

valid of length
BrCBr


 85%|████████▍ | 5512/6512 [06:55<00:45, 22.14it/s]

Cc1c(N)nc(C(CNCC(N)C(N)=O)C(N)=O)nc1C(=O)NC(C(=O)NC(C)C(O)C(C)C(=O)NC(C(=O)NCCc1nc(-c2nc(C(=O)NCCCN(C)CCCNC(=N)CC3CCC(Cl)CC3)cs2)cs1)C(C)O)C(OC1OC(CO)C(O)C(O)C1OC1OC(CO)C(O)C(OC(N)=O)C1O)c1c[nH]cn1


 86%|████████▋ | 5625/6512 [07:05<03:19,  4.44it/s]

f{smiles} out 127
C[C@@H]1[C@H]2C3=CC[C@@H]4[C@@]5(C)C[C@@H](O)[C@H](O)[C@@](C)(CO)[C@@H]5CC[C@@]4(C)[C@]3(C)CC[C@@]2(C(=O)O[C@@H]2O[C@H](CO[C@@H]3O[C@H](CO)[C@@H](O[C@@H]4O[C@@H](C)[C@H](O)[C@@H](O)[C@H]4O)[C@H](O)[C@H]3O)[C@@H](O)[C@H](O)[C@H]2O)CC[C@H]1C


 90%|█████████ | 5867/6512 [07:20<01:48,  5.95it/s]

f{smiles} out 127
OCC1OC(O)C(O)C(OP2(OC3C(O)C(O)OC(CO)C3O)=NP(OC3C(O)C(O)OC(CO)C3O)(OC3C(O)C(O)OC(CO)C3O)=NP(OC3C(O)C(O)OC(CO)C3O)(OC3C(O)C(O)OC(CO)C3O)=N2)C1O


 92%|█████████▏| 5994/6512 [07:29<01:28,  5.86it/s]

f{smiles} out 127
COC(C(=O)C(O)C(C)O)C1Cc2cc3cc(OC4CC(OC5CC(O)C(OC)C(C)O5)C(OC(C)=O)C(C)O4)c(C)c(O)c3c(O)c2C(=O)C1OC1CC(OC2CC(OC3CC(C)(O)C(OC(C)=O)C(C)O3)C(O)C(C)O2)C(O)C(C)O1


 93%|█████████▎| 6043/6512 [07:37<02:01,  3.86it/s]

f{smiles} out 127
CC1CCC2(OC1)OC1CC3C4CC=C5CC(OC6OC(CO)C(OC7OC(C)C(O)C(O)C7O)C(O)C6OC6OC(C)C(O)C(O)C6O)CCC5(C)C4CCC3(C)C1C2C


 94%|█████████▍| 6153/6512 [07:46<00:49,  7.32it/s]

Cc1cc(=O)oc2ccc3c(c12)C1(C)OOC1(C)O3


 95%|█████████▌| 6209/6512 [07:50<00:19, 15.69it/s]

valid of length
S=C=S


 96%|█████████▌| 6230/6512 [07:53<00:52,  5.33it/s]

ClC1=C(Cl)[C@]2(Cl)[C@H]3[C@@H]4C[C@@H]([C@@H]5O[C@H]45)[C@H]3[C@@]1(Cl)C2(Cl)Cl


 96%|█████████▋| 6282/6512 [07:58<00:30,  7.53it/s]

N/N=C1\N=NC=C2C=CC=C[C@@H]21


 97%|█████████▋| 6303/6512 [08:00<00:14, 14.19it/s]

CCCCCCCCOP(=O)(OCCCCCCCC)OCCCCCCCC


 98%|█████████▊| 6397/6512 [08:05<00:03, 33.53it/s]

valid of length
Cl[Si](Cl)(Cl)Cl


 99%|█████████▉| 6435/6512 [08:07<00:04, 19.11it/s]

ClCl


100%|██████████| 6512/6512 [08:11<00:00, 13.24it/s]


39 is valid
/home/zjh/mr/downstream/dataset/classify/BBBP.pth


  1%|          | 11/2039 [00:01<04:14,  7.98it/s]

valid of length
C(Cl)Cl


  8%|▊         | 169/2039 [00:20<13:29,  2.31it/s]

f{smiles} out 127
CO[C@@H]([C@@H]1Cc2cc3cc(O[C@H]4C[C@@H](O[C@H]5C[C@@H](O)[C@H](O)[C@@H](C)O5)[C@H](O)[C@@H](C)O4)c(C)c(O)c3c(O)c2C(=O)[C@H]1O[C@H]6C[C@@H](O[C@H]7C[C@@H](O[C@H]8C[C@](C)(O)[C@H](O)[C@@H](C)O8)[C@H](O)[C@@H](C)O7)[C@H](O)[C@@H](C)O6)C(=O)[C@@H](O)[C@@H](C)O


 10%|▉         | 201/2039 [00:32<22:54,  1.34it/s]

f{smiles} out 127
CCC(C)C(N)C1=NC(CS1)C(=O)N[C@@H](CC(C)C)C(=O)N[C@H](CCC(O)=O)C(=O)N[C@@H]([C@@H](C)CC)C(=O)NCCCC[C@@H]2NC(=O)[C@H](CC(N)=O)NC(=O)[C@@H](CC(O)=O)NC(=O)[C@H](Cc3[nH]cnc3)NC(=O)[C@@H](Cc4ccccc4)NC(=O)[C@@H](NC(=O)[C@@H](CCCN)NC2=O)[C@@H](C)CC


 17%|█▋        | 348/2039 [00:49<07:32,  3.74it/s]

f{smiles} out 127
C[C@H]1O[C@@H](O[C@@H]\2C[C@@H]3O[C@](O)(C[C@@H](O)C[C@@H](O)[C@H](O)CC[C@@H](O)C[C@@H](O)CC(=O)O[C@@H](C)[C@H](C)[C@H](O)[C@@H](C)\C=C/C=C\C=C/C=C\C=C/C=C\C=C2)C[C@H](O)[C@H]3C(O)=O)[C@@H](O)[C@@H](N)[C@@H]1O


 18%|█▊        | 371/2039 [00:59<31:12,  1.12s/it]

f{smiles} out 127
CN[C@H](CC(C)C)C(=O)NC1[C@H](O)c2ccc(Oc3cc4cc(Oc5ccc(cc5Cl)[C@@H](O)[C@@H]6NC(=O)[C@H](NC(=O)[C@@H]4NC(=O)[C@H](CC(N)=O)NC1=O)c7ccc(O)c(c7)c8c(O)cc(O)cc8[C@@H](NC6=O)C(O)=O)c3O[C@@H]9O[C@H](CO)[C@@H](O)[C@H](O)[C@H]9O[C@H]%10C[C@](C)(N)[C@H](O)[C@H](C)O%10)c(Cl)c2


 21%|██        | 425/2039 [01:08<10:21,  2.60it/s]

f{smiles} out 127
CC(C)[C@@H]1NC(=O)[C@H](C)OC(=O)C(NC(=O)[C@H](OC(=O)[C@@H](NC(=O)[C@H](C)OC(=O)[C@H](NC(=O)[C@H](OC(=O)[C@@H](NC(=O)[C@H](C)OC(=O)[C@H](NC(=O)[C@H](OC1=O)C(C)C)C(C)C)C(C)C)C(C)C)C(C)C)C(C)C)C(C)C)C(C)C


 21%|██▏       | 435/2039 [01:08<03:22,  7.92it/s]

valid of length
ClCCl


 28%|██▊       | 562/2039 [01:19<01:30, 16.37it/s]

valid of length
ClC(Cl)Cl


 39%|███▉      | 800/2039 [01:53<20:17,  1.02it/s]

f{smiles} out 127
CCC1C(=O)N(CC(=O)N(C(C(=O)NC(C(=O)N(C(C(=O)NC(C(=O)NC(C(=O)N(C(C(=O)N(C(C(=O)N(C(C(=O)N(C(C(=O)N1)C(C(C)CC=CC)O)C)C(C)C)C)CC(C)C)C)CC(C)C)C)C)C)CC(C)C)C)C(C)C)CC(C)C)C)C


 39%|███▉      | 802/2039 [01:56<21:16,  1.03s/it]

f{smiles} out 127
CC(C)C1NC(=O)C(NC(=O)c2ccc(C)c3OC4=C(C)C(=O)C(=C(C(=O)NC5C(C)OC(=O)C(C(C)C)N(C)C(=O)CN(C)C(=O)C6CCCN6C(=O)C(NC5=O)C(C)C)C4=Nc23)N)C(C)OC(=O)C(C(C)C)N(C)C(=O)CN(C)C(=O)C7CCCN7C1=O


 40%|███▉      | 811/2039 [02:01<18:59,  1.08it/s]

f{smiles} out 127
CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]3O[C@H](C)C[C@@H]([C@H]3O)N(C)C)[C@](C)(O)C[C@@H](C)[C@@H]4N[C@@H](COCCOC)O[C@H]([C@H]4C)[C@]1(C)O


 40%|███▉      | 813/2039 [02:04<27:14,  1.33s/it]

f{smiles} out 127
CCC(C(=O)NC\C=C\C=C(/C)C(OC)C(C)C1OC(\C=C\C=C\C=C(/C)C(=O)C2=C(O)N(C)C=CC2=O)C(O)C1O)C3(O)OC(\C=C\C=C\C)C(C)(C)C(OC4OC(C)C(OC5OC(C)C(OC)C(O)C5OC)C(OC)C4O)C3O


 40%|████      | 816/2039 [02:08<29:28,  1.45s/it]

f{smiles} out 127
CCC(C)[C@H]1O[C@]2(CC[C@@H]1C)CC3C[C@@H](C\C=C(C)\[C@@H](O[C@H]4C[C@H](OC)[C@@H](OC5C[C@H](OC)[C@@H](O)[C@H](C)O5)[C@H](C)O4)[C@@H](C)/C=C/C=C/6CO[C@@H]7[C@H](O)C(=C[C@@H](C(=O)O3)[C@]67O)C)O2.CO[C@H]8CC(O[C@@H](C)[C@@H]8O)O[C@H]9[C@H](C)O[C@H](C[C@@H]9OC)O[C@H]/%10[C@@H](C)/C=C/C=C/%11CO[C@@H]%12[C@H](O)C(=C[C@@H](C(=O)OC%13C[C@@H](C\C=C%10C)O[C@@]%14(CC[C@H](C)[C@H](O%14)C(C)C)C%13)[C@]%11%12O)C


 40%|████      | 820/2039 [02:10<12:58,  1.57it/s]

f{smiles} out 127
CN(C)[C@H]1[C@@H]2C[C@H]3C(=C(O)c4c(O)cccc4[C@@]3(C)O)C(=O)[C@]2(O)C(=O)\C(=C(/O)NCN(C)CCN(C)CNC(/O)=C/5C(=O)[C@H]([C@@H]6C[C@H]7C(=C(O)c8c(O)cccc8[C@@]7(C)O)C(=O)[C@]6(O)C5=O)N(C)C)C1=O


 41%|████      | 834/2039 [02:16<11:40,  1.72it/s]

f{smiles} out 127
CC(C)CC1C(=O)NC(C(=O)N2CCCC2C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N3CCCC3C(=O)NC(C(=O)NC(C(=O)N1)CCCN)C(C)C)CC4=CC=CC=C4)CC(C)C)CCCN)C(C)C)CC5=CC=CC=C5


 44%|████▍     | 906/2039 [02:46<17:01,  1.11it/s]

f{smiles} out 127
CC[C@@]1(O)C[C@H](O[C@H]2C[C@@H]([C@H](O[C@H]3C[C@@H]4O[C@H]5CC(=O)[C@H](C)O[C@H]5O[C@@H]4[C@H](C)O3)[C@H](C)O2)N(C)C)c6c(O)c7C(=O)c8c(O)cccc8C(=O)c7c(O)c6[C@H]1O[C@H]9C[C@@H]([C@H](O)[C@H](C)O9)N(C)C


 45%|████▍     | 908/2039 [02:48<17:40,  1.07it/s]

f{smiles} out 127
CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]3O[C@H](C)C[C@@H]([C@H]3O)N(C)C)[C@](C)(O)C[C@@H](C)\C(=N\OCOCCOC)[C@H](C)[C@@H](O)[C@]1(C)O


 45%|████▍     | 915/2039 [02:50<08:35,  2.18it/s]

f{smiles} out 127
COC1C(O)CC(=O)OC(C)C\C=C\C=C\C(OC2CCC(C(C)O2)N(C)C)C(C)CC(CC=O)C1OC3OC(C)C(OC4CC(C)(O)C(O)C(C)O4)C(C3O)N(C)C


 45%|████▌     | 923/2039 [02:59<25:50,  1.39s/it]

f{smiles} out 127
CCCCCCCCCC(=O)N[C@@H]1[C@@H](O)[C@H](O)[C@@H](CO)O[C@H]1Oc2c3Oc4ccc(C[C@H]5NC(=O)[C@H](N)c6ccc(O)c(Oc7cc(O)cc(c7)[C@H](NC5=O)C(=O)N[C@H]8C(=O)N[C@H]9C(=O)N[C@@H]([C@H](O[C@@H]%10O[C@H](CO)[C@@H](O)[C@H](O)[C@H]%10NC(C)=O)c%11ccc(Oc2cc8c3)c(Cl)c%11)C(=O)N[C@@H](C(O)=O)c%12cc(O)cc(O[C@H]%13O[C@H](CO)[C@@H](O)[C@H](O)[C@@H]%13O)c%12c%14cc9ccc%14O)c6)cc4Cl


 51%|█████     | 1044/2039 [03:17<01:06, 14.88it/s]

valid of length
C(Br)(Br)Br


 52%|█████▏    | 1064/2039 [03:19<01:15, 12.87it/s]

OC1OC(COCCOC(O)C(Cl)(Cl)Cl)C(OC2OC(COCCOCCOC(O)C(Cl)(Cl)Cl)CC(OC(O)C(Cl)(Cl)Cl)C2O)C(OCCOCCOC(O)C(Cl)(Cl)Cl)C1O


 53%|█████▎    | 1072/2039 [03:20<01:15, 12.73it/s]

valid of length
C(Cl)(Cl)Cl


 98%|█████████▊| 1989/2039 [05:17<01:03,  1.26s/it]

[C@@H]3(C1=CC=C(Cl)C=C1)[C@H]2CC[C@@H](C2)C34CCC(=N4)N5CCOCC5


100%|█████████▉| 2036/2039 [05:21<00:00, 15.31it/s]

valid of length
[N+](=[N-])=O


100%|██████████| 2039/2039 [05:22<00:00,  6.33it/s]


8 is valid
/home/zjh/mr/downstream/dataset/classify/bace.pth


  5%|▍         | 75/1513 [00:28<13:16,  1.81it/s]

f{smiles} out 127
O(C(C)(C)C)C(=O)N[C@@H](C(C)C)C(=O)N([C@@H](CCC(=O)N)C(=O)N[C@H]([C@@H](O)CC(=O)N[C@@H](CC(C)C)C(=O)N[C@H](C(=O)N([C@H](Cc1ccccc1)C(=O)N1CCC[C@@H]1C(OC)=O)C)C)Cc1ccccc1)C


  5%|▌         | 83/1513 [00:33<21:14,  1.12it/s]

f{smiles} out 127
S(=O)(=O)(N(C)c1cc(cc(c1)C(=O)N[C@H]([C@@H](O)CC(=O)N[C@@H](CC(C)C)C(=O)N[C@H](C(=O)N([C@H](Cc1ccccc1)C(=O)N1CCC[C@H]1C(OC)=O)C)C)Cc1ccccc1)C(=O)N[C@H](C)c1ccccc1)C


  6%|▌         | 89/1513 [00:36<15:49,  1.50it/s]

f{smiles} out 127
O=C(N)CC[C@H](N(C(=O)[C@@H](NC(=O)[C@@H](O)C)C(C)C)C)C(=O)N[C@H]([C@@H](O)CC(=O)N[C@@H](CC(C)C)C(=O)N[C@H](C(=O)N([C@H](CC1CCCCC1)C(=O)[O-])C)C)CC1CCCCC1


  7%|▋         | 104/1513 [00:42<19:41,  1.19it/s]

f{smiles} out 127
S(=O)(=O)(N[C@@H]1C[C@H](C[C@@H](C1)C(=O)N[C@H]([C@@H](O)CC(=O)N[C@@H](CC(C)C)C(=O)N[C@H](C(=O)N([C@H](CC1CCCCC1)C(=O)N1CCC[C@H]1C(OC)=O)C)C)CC1CCCCC1)C(=O)N[C@H](C)C1CCCCC1)C


  7%|▋         | 113/1513 [00:46<15:48,  1.48it/s]

f{smiles} out 127
O(C(=O)[C@H]1N(CCC1)C(=O)[C@H](N(C(=O)[C@@H](NC(=O)[C@@H](NC(=O)C[C@H](O)[C@@H](NC(=O)[C@@H](N(C(=O)[C@@H](NC(=O)[C@@H](O)C)C(C)C)C)CCC(=O)N)CC1CCCCC1)CC(C)C)C)C)CC1CCCCC1)C


 13%|█▎        | 203/1513 [01:00<01:05, 20.08it/s]

f{smiles} out 127
OC(C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C(NC(=O)C([NH3+])Cc1c2c([nH]c1)cccc2)Cc1c2c([nH]c1)cccc2)CO)CCC(=O)[O-])C(C)C)CC(=O)N)CC(C)C)CC(C(=O)NC(C(=O)NC(CCC(=O)[O-])C(=O)NC(Cc1ccccc1)C(=O)[O-])C)C


 14%|█▎        | 206/1513 [01:03<08:23,  2.60it/s]

f{smiles} out 127
OC(C(NC(=O)C(NC(=O)C(NC(=O)C([NH3+])CCC(=O)[O-])CC(C)C)CC(=O)[O-])CC(C)C)CC(C(=O)NC(C(C)C)C(=O)NC(CCC(=O)[O-])C(=O)NC(Cc1ccccc1)C(=O)[O-])C


100%|██████████| 1513/1513 [06:02<00:00,  4.17it/s]


0 is valid
