In [1]:
import csv
import json
from rdkit import Chem
from collections import Counter
import matplotlib.pyplot as plt
from rdkit.Chem import Draw
import builtins

In [2]:
with open ('/home/test/BiDe/sele/all_acetylene.json', 'r') as f:
    all_ace_smiles_dict = json.load(f)
all_ace_smiles_list = list(all_ace_smiles_dict.values())
all_ace_keys_list = list(all_ace_smiles_dict.keys())
with open ('/home/test/BiDe/sele/all_NH2.json', 'r') as f:
    all_N3_smiles_dict = json.load(f)
all_N3_smiles_list = list(all_N3_smiles_dict.values())
all_N3_keys_list = list(all_N3_smiles_dict.keys())

In [3]:
with open ('/home/test/BiDe/sele/N3.csv', 'r') as f:
    N3_list = f.read().splitlines()
with open ('/home/test/BiDe/sele/acytylene.csv', 'r') as f:
    acytylene_list = f.read().splitlines()

In [4]:
def smiles_to_image_grid(smiles_list, img_size=(200, 200), mols_per_row=4, kekulize=True):
    mols = [Chem.MolFromSmiles(smiles) for smiles in smiles_list]
    if kekulize:
        for mol in mols:
            Chem.Kekulize(mol)
    img = Draw.MolsToGridImage(mols, molsPerRow=mols_per_row, subImgSize=img_size)
    return img

In [5]:
def save_list_to_csv(filename, data_list):
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        for item in data_list:
            csv_writer.writerow([item])

In [6]:
def convert_primary_amines_to_diazonium(smiles):
    molecule = Chem.MolFromSmiles(smiles)
    primary_amine_pattern = Chem.MolFromSmarts('[NX3;H2]')  
    diazonium_pattern = Chem.MolFromSmiles('[N]=[N+]=[N-]')
    wrong_smiles = Chem.MolFromSmiles('C([N]=[N+]=[N-])[N]=[N+]=[N-]')
    wrong_smiles_2 = Chem.MolFromSmiles('C(=O)[N]=[N+]=[N-]')
    true_smiles = Chem.MolFromSmiles('C(N)N')
    true_smiles_2 = Chem.MolFromSmiles('C(=O)N')
    if molecule.HasSubstructMatch(primary_amine_pattern):
        modified_molecule = Chem.ReplaceSubstructs(molecule, primary_amine_pattern, diazonium_pattern, replaceAll=True)[0]
        if modified_molecule.HasSubstructMatch(wrong_smiles):
            modified_molecule = Chem.ReplaceSubstructs(modified_molecule, wrong_smiles, true_smiles, replaceAll=True)[0]
        elif modified_molecule.HasSubstructMatch(wrong_smiles_2):
            modified_molecule = Chem.ReplaceSubstructs(modified_molecule, wrong_smiles_2, true_smiles_2, replaceAll=True)[0]
        new_smiles = Chem.MolToSmiles(modified_molecule, isomericSmiles=True)
        return new_smiles
    elif '[NH2+]' in smiles:
        try:
            boc_protected_amine = Chem.MolFromSmiles(smiles)
            reaction = BocRemoval(bocamine=boc_protected_amine)
            molecule = reaction.get_product()
            modified_molecule = Chem.ReplaceSubstructs(molecule, primary_amine_pattern, diazonium_pattern, replaceAll=True)[0]
            new_smiles = Chem.MolToSmiles(modified_molecule, isomericSmiles=True)
            return new_smiles
        except:
            # print(smiles,"无法脱Boc")
            return None
    else:
        # print( smiles,"未找到一级胺")
        return None

In [7]:
receptor_pdb_list=['7ENE','7L0D','7L10','7M04','7M90','7RFS','7RVT','7S6X','6LU7']

In [8]:
docking_results_NH2 = {}
docking_results_ace = {}
docking_results_all = []

In [9]:
for i in receptor_pdb_list:
    docking_results_NH2[i] = []
    docking_results_ace[i] = []
    with open (f'./docking_result_for_NH2/{i}_docking_result.txt', 'r') as f:
        docking_results_values_NH2 = f.read().splitlines()
        docking_results_NH2[i] = docking_results_values_NH2
        docking_results_all = docking_results_all + docking_results_values_NH2
    with open (f'./docking_result_for_acetylene/{i}_docking_result.txt', 'r') as f:
        docking_results_values_ace = f.read().splitlines()
        docking_results_ace[i] = docking_results_values_ace
        docking_results_all = docking_results_all + docking_results_values_ace

In [10]:
top45_NH2_list = []
top45_acylyene_list = []
for i in docking_results_NH2:
    sorted_data = sorted(enumerate(docking_results_NH2[i]), key=lambda x: x[1], reverse=True)
    top_ten_indices = [index for index, value in sorted_data[:5]]
    for j in top_ten_indices:
        top45_NH2_list.append(N3_list[int(all_N3_keys_list[j].split('_')[0])])
        top45_acylyene_list.append(acytylene_list[int(all_N3_keys_list[j].split('_')[1])])
for i in docking_results_ace:
    sorted_data = sorted(enumerate(docking_results_ace[i]), key=lambda x: x[1], reverse=True)
    top_ten_indices = [index for index, value in sorted_data[:5]]
    for j in top_ten_indices:
        top45_NH2_list.append(N3_list[int(all_ace_keys_list[j].split('_')[0])])
        top45_acylyene_list.append(acytylene_list[int(all_ace_keys_list[j].split('_')[1])])

In [11]:
element_counts_NH2 = Counter(top45_NH2_list)
element_counts_acylyene = Counter(top45_acylyene_list)
print(element_counts_NH2)
print(element_counts_acylyene)
# Counter({'[N-]=[N+]=[N]c1cccc2c1[nH]c1ccccc12': 6, '[N-]=[N+]=[N][C@@H]1CCc2[nH]c3ccccc3c2C1': 5, '[N-]=[N+]=[N]c1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 4, '[N-]=[N+]=[N]c1cc(F)c2ncccc2c1': 4, '[N-]=[N+]=[N]c1ccc2c(c1)NC(=O)CC2': 4, '[N-]=[N+]=[N][C@@H](CO)c1cc2ccccc2c2ccccc12': 3, '[N-]=[N+]=[N]c1ccc2cc(-c3ccccc3)c(=O)oc2c1': 3, '[N-]=[N+]=[N]c1nnc(Cc2ccc(F)cc2)o1': 3, '[N-]=[N+]=[N][C@@H]1CCc2ccc(F)cc21': 3, '[N-]=[N+]=[N]c1ccc2cccc(O)c2c1': 3, '[N-]=[N+]=[N]c1cc2ccccc2cc1O': 3, '[N-]=[N+]=[N]CCC(O)c1ccccc1': 2, '[N-]=[N+]=[N]C1CC2(CNC(=O)C2)C1': 2, '[N-]=[N+]=[N]CCC(=O)N1Cc2ccccc2C#Cc2ccccc21': 2, '[N-]=[N+]=[N]CCC1CCc2ccc3c(c21)CCO3': 2, '[N-]=[N+]=[N][C@H]1CCc2ccc(F)cc21': 2, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)NC(=O)c2ccccc2)cc1': 1, '[N-]=[N+]=[N]c1nc2c(N3CCOCC3)cccc2s1': 1, '[N-]=[N+]=[N]CCc1c[nH]c2cc(F)ccc12': 1, '[N-]=[N+]=[N]c1nc2ccc(OC(F)(F)F)cc2s1': 1, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)[N-]c2cnc3ccccc3n2)cc1': 1, '[N-]=[N+]=[N]c1nc2cc(C(F)(F)F)ccn2n1': 1, '[N-]=[N+]=[N]C1CCCc2cc(O)ccc21': 1, '[N-]=[N+]=[N]c1ccc2c(c1)CCC(=O)N2': 1, 'CC(=O)Nc1ccc(Cc2ccc([N]=[N+]=[N-])cc2)cc1': 1, '[N-]=[N+]=[N]c1ccc(N2CCN(C(=O)c3ccccc3F)CC2)cc1': 1, '[N-]=[N+]=[N]c1ccccc1Cl': 1, 'N#Cc1c[nH]c2ccc([N]=[N+]=[N-])cc12': 1, '[N-]=[N+]=[N]c1cccc2c(Cl)cccc12': 1, 'Cc1ccc2[nH]c([N]=[N+]=[N-])nc2c1': 1, '[N-]=[N+]=[N]CCNC(=O)Cc1cccc2ccccc12': 1, '[N-]=[N+]=[N]c1ccc(CCc2ccccc2)cc1': 1, '[N-]=[N+]=[N]c1nc(-c2ccc(C(F)(F)F)cc2)cs1': 1, 'Cc1ccc2nc(-c3ccc([N]=[N+]=[N-])cc3O)oc2c1': 1, '[N-]=[N+]=[N]c1ccc2[nH]c(=O)c(=O)[nH]c2c1': 1, '[N-]=[N+]=[N]c1ccc2c3c(cccc13)C(=O)OC2=O': 1, '[N-]=[N+]=[N]c1c[nH]c2ncnc(Cl)c12': 1, '[N-]=[N+]=[N]CC1CN(C(c2ccccc2)c2ccccc2)C1': 1, '[N-]=[N+]=[N]c1n[nH]c2ccc(Cc3cc(F)cc(F)c3)cc12': 1, '[N-]=[N+]=[N]C1CCN(C(=O)OCc2ccccc2)C1': 1, '[N-]=[N+]=[N]c1n[nH]c2cc(C(F)(F)F)ccc12': 1, '[N-]=[N+]=[N]c1nc(-c2cccc(C(F)(F)F)c2)n[nH]1': 1, '[N-]=[N+]=[N]C(Cc1ccc(-c2ccccc2)cc1)C(=O)O': 1, '[N-]=[N+]=[N]c1ccc2cc(C(=O)O)cnc2c1': 1, '[N-]=[N+]=[N]c1ccn(Cc2ccccc2)n1': 1, '[N-]=[N+]=[N]c1ccc(-n2c(=O)[nH]c3cc(F)c(F)cc3c2=O)cc1': 1, 'Cc1nc(C[N]=[N+]=[N-])nc2c1CCCC2': 1, 'CNc1cc2[nH]c(=O)n(-c3ccc([N]=[N+]=[N-])cc3)c(=O)c2cc1F': 1, '[N-]=[N+]=[N]c1cccc2c1C=CCO2': 1, 'Cc1ccc(-c2cccc([N]=[N+]=[N-])c2)cc1': 1, '[N-]=[N+]=[N]c1ccn(-c2c(F)cccc2F)n1': 1, '[N-]=[N+]=[N]c1nnc(-c2ccccc2)o1': 1, '[N-]=[N+]=[N]c1c(F)ccc2cnccc12': 1, '[N-]=[N+]=[N]c1cccc2cc(C(=O)O)oc12': 1, 'Cc1cc([N]=[N+]=[N-])n(-c2ccccc2)n1': 1})
# Counter({'C#Cc1ccc2c(c1)C(=O)N(C1CCC(=O)NC1=O)C2=O': 28, 'C#Cc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 19, 'C#Cc1cnc(N)c(-c2ccccc2)n1': 8, 'C#Cc1ccc2ccccc2c1': 5, 'C#Cc1ccc(-c2ccc(O)cc2)cc1': 2, 'C#CCON1C(=O)c2ccccc2C1=O': 2, 'C#CCN[C@@H]1CCc2ccccc21': 2, 'C#Cc1ccc(C#Cc2ccccc2)cc1': 2, 'C#Cc1cccc2nccn12': 2, 'C#CCCNC(=O)c1ccc(N)c(N)c1': 2, 'C#Cc1ccccc1C(F)(F)F': 2, 'C#CCn1c2ccccc2c2ccccc21': 1, 'C#Cc1cccc(C(=O)O)c1': 1, 'C#Cc1cnc2[nH]ncc2c1': 1, 'C#Cc1ccc2[nH]nc(C)c2c1': 1, 'C#Cc1cc(F)c(F)cc1F': 1, 'C#Cc1cccc2cn[nH]c12': 1, 'C#Cc1cccc(=O)[nH]1.O=C(O)C(F)(F)F': 1, 'C#Cc1cccc(C(N)=O)c1': 1, 'C#CCN1C(=O)c2ccccc2C1=O': 1, 'C#CCOc1ccccc1C=O': 1, 'C#CC(=O)c1ccccc1Cl': 1, 'C#CC(N)Cc1cccc(F)c1.Cl': 1, 'C#CCN[C@@H]1CCc2ccccc21.CS(=O)(=O)O': 1, 'C#CCOC(=O)c1ccccc1': 1, 'C#Cc1c[nH]c2ccc([N+](=O)[O-])cc12': 1, 'C#Cc1cccc2ccccc12': 1})

Counter({'[N-]=[N+]=[N]c1cccc2c1[nH]c1ccccc12': 6, '[N-]=[N+]=[N][C@@H]1CCc2[nH]c3ccccc3c2C1': 5, '[N-]=[N+]=[N]c1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 4, '[N-]=[N+]=[N]c1cc(F)c2ncccc2c1': 4, '[N-]=[N+]=[N]c1ccc2c(c1)NC(=O)CC2': 4, '[N-]=[N+]=[N][C@@H](CO)c1cc2ccccc2c2ccccc12': 3, '[N-]=[N+]=[N]c1ccc2cc(-c3ccccc3)c(=O)oc2c1': 3, '[N-]=[N+]=[N]c1nnc(Cc2ccc(F)cc2)o1': 3, '[N-]=[N+]=[N][C@@H]1CCc2ccc(F)cc21': 3, '[N-]=[N+]=[N]c1ccc2cccc(O)c2c1': 3, '[N-]=[N+]=[N]c1cc2ccccc2cc1O': 3, '[N-]=[N+]=[N]CCC(O)c1ccccc1': 2, '[N-]=[N+]=[N]C1CC2(CNC(=O)C2)C1': 2, '[N-]=[N+]=[N]CCC(=O)N1Cc2ccccc2C#Cc2ccccc21': 2, '[N-]=[N+]=[N]CCC1CCc2ccc3c(c21)CCO3': 2, '[N-]=[N+]=[N][C@H]1CCc2ccc(F)cc21': 2, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)NC(=O)c2ccccc2)cc1': 1, '[N-]=[N+]=[N]c1nc2c(N3CCOCC3)cccc2s1': 1, '[N-]=[N+]=[N]CCc1c[nH]c2cc(F)ccc12': 1, '[N-]=[N+]=[N]c1nc2ccc(OC(F)(F)F)cc2s1': 1, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)[N-]c2cnc3ccccc3n2)cc1': 1, '[N-]=[N+]=[N]c1nc2cc(C(F)(F)F)ccn2n1': 1, '[N-]=[N+]=[N]C1CCCc2cc(O)ccc21': 1,

In [12]:
docking_results_values_NH2 = {}
for i in receptor_pdb_list:
    docking_results_values_NH2[i] = []
    for j in range(0,len(docking_results_NH2[i]),3):
        sum_result = builtins.sum(float(k) for k in docking_results_NH2[i][j:j+3])
        docking_results_values_NH2[i].append([int(all_N3_keys_list[j].split('_')[0]),sum_result])


In [13]:
all_top10_NH2_list = []
for i in docking_results_values_NH2:
    sorted_data_NH2 = sorted(docking_results_values_NH2[i], key=lambda x: x[1])
    top_ten_indices_NH2 = [index for index, value in sorted_data_NH2[:10]]
    for j in top_ten_indices_NH2:
        all_top10_NH2_list.append(N3_list[j])

In [14]:
element_counts_all_top10_NH2 = Counter(all_top10_NH2_list)
print(element_counts_all_top10_NH2)
# {'[N-]=[N+]=[N][C@H](CO)c1ccc2cc3ccccc3cc2c1': 8, '[N-]=[N+]=[N]c1cccc2c1[nH]c1ccccc12': 6, '[N-]=[N+]=[N][C@@H](CO)c1cc2ccccc2c2ccccc12': 5, '[N-]=[N+]=[N]C12CCCC(NC(=O)OCc3ccccc3)(CC1)C2': 5, '[N-]=[N+]=[N]c1ccc2cc(-c3nc4ccccc4[nH]3)c(=O)oc2c1': 5, '[N-]=[N+]=[N]Cc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 4, '[N-]=[N+]=[N]c1ccc2c(=O)[nH][nH]c(=O)c2c1': 3, '[N-]=[N+]=[N]c1ccc(N2CCN(C(=O)c3ccccc3F)CC2)cc1': 3, '[N-]=[N+]=[N]c1ccc(CNc2ccccc2)cc1': 2, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)[N-]c2cnc3ccccc3n2)cc1': 2, '[N-]=[N+]=[N]c1ccc2cc(-c3ccccc3)c(=O)oc2c1': 2, '[N-]=[N+]=[N]c1ccc2c(CC(=O)O)cc(=O)oc2c1': 2, '[N-]=[N+]=[N]c1ccc(C(=O)Nc2ccccc2Cl)cc1': 2, '[N-]=[N+]=[N]c1ccn(Cc2ccc(Cl)c(Cl)c2)n1': 2, '[N-]=[N+]=[N]c1n[nH]c2ccc([N+](=O)[O-])cc12': 2, '[N-]=[N+]=[N]c1ccc(-n2c(=O)[nH]c3cc(F)c(F)cc3c2=O)cc1': 2, '[N-]=[N+]=[N]c1ccc(N2CCN(c3ccc(O)cc3)CC2)cc1': 2, 'Cc1ccc2nc(-c3ccc([N]=[N+]=[N-])cc3O)oc2c1': 2, '[N-]=[N+]=[N][C@H]1CC[C@H](NC2CC2c2ccccc2)CC1': 1, '[N-]=[N+]=[N]c1n[nH]c2ccc(Cc3cc(F)cc(F)c3)cc12': 1, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)NC(=O)c2ccccc2)cc1': 1, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)NCc2ccccc2)cc1': 1, '[N-]=[N+]=[N]C1CCN(C(=O)OCc2ccccc2)C1': 1, '[N-]=[N+]=[N]c1nc(-c2cccc(C(F)(F)F)c2)n[nH]1': 1, '[N-]=[N+]=[N]c1cccc(-c2cccc(C(=O)O)c2)c1O': 1, '[N-]=[N+]=[N]c1cc(-c2cccc(SC(F)(F)F)c2)n[nH]1': 1, '[N-]=[N+]=[N]c1ccc(O)c(-c2nc3ccccc3s2)c1': 1, '[N-]=[N+]=[N]CCC(=O)N1Cc2ccccc2C#Cc2ccccc21': 1, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)N2CCCc3ccccc32)cc1': 1, '[N-]=[N+]=[N]c1ccccc1C#Cc1ccc(F)cc1': 1, 'CNc1cc2[nH]c(=O)n(-c3ccc([N]=[N+]=[N-])cc3)c(=O)c2cc1F': 1, 'N#Cc1c[nH]c2ccc([N]=[N+]=[N-])cc12': 1, '[N-]=[N+]=[N]c1ccc2c(c1)C(=O)N(C1CCC(=O)NC1=O)C2=O': 1, '[N-]=[N+]=[N]c1ccc2c(c1)CN(C1CCC(=O)NC1=O)C2=O': 1, '[N-]=[N+]=[N]c1cc(F)c2cnccc2c1': 1, '[N-]=[N+]=[N][C@@H](Cc1ccc(C(=O)c2ccccc2)cc1)C(=O)O': 1, '[N-]=[N+]=[N]C1CCOc2ccccc2C1': 1, '[N-]=[N+]=[N][C@H]1c2cc(Br)ccc2C[C@H]1O': 1, 'C[C@H]([N]=[N+]=[N-])c1ccc(C(=O)Nc2ccnc3[nH]ccc23)cc1': 1, '[N-]=[N+]=[N]c1ccc(-c2cccc(F)c2)cc1': 1, '[N-]=[N+]=[N]c1cc(-c2ccc(C(F)(F)F)cc2)n[nH]1': 1, '[N-]=[N+]=[N]c1c2c(cc3c1C(C(F)F)CC3)CCC2': 1, 'CNc1ccc2nc3ccc([N]=[N+]=[N-])cc3[s+]c2c1': 1, '[N-]=[N+]=[N]c1ccc2[nH]c(=O)c(=O)[nH]c2c1': 1, '[N-]=[N+]=[N]c1ccc2c3c(cccc13)C(=O)OC2=O': 1, '[N-]=[N+]=[N]c1cccc(C(O)(C(F)(F)F)C(F)(F)F)c1': 1, 'CC1CCc2cc3c(c([N]=[N+]=[N-])c21)CCC3': 1, '[N-]=[N+]=[N]c1ccc(C(F)(F)F)c2cccnc12': 1, '[N-]=[N+]=[N]c1cc(F)cc2c1NC(=O)C2': 1})

Counter({'[N-]=[N+]=[N][C@H](CO)c1ccc2cc3ccccc3cc2c1': 8, '[N-]=[N+]=[N]c1cccc2c1[nH]c1ccccc12': 6, '[N-]=[N+]=[N][C@@H](CO)c1cc2ccccc2c2ccccc12': 5, '[N-]=[N+]=[N]C12CCCC(NC(=O)OCc3ccccc3)(CC1)C2': 5, '[N-]=[N+]=[N]c1ccc2cc(-c3nc4ccccc4[nH]3)c(=O)oc2c1': 5, '[N-]=[N+]=[N]Cc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 4, '[N-]=[N+]=[N]c1ccc2c(=O)[nH][nH]c(=O)c2c1': 3, '[N-]=[N+]=[N]c1ccc(N2CCN(C(=O)c3ccccc3F)CC2)cc1': 3, '[N-]=[N+]=[N]c1ccc(CNc2ccccc2)cc1': 2, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)[N-]c2cnc3ccccc3n2)cc1': 2, '[N-]=[N+]=[N]c1ccc2cc(-c3ccccc3)c(=O)oc2c1': 2, '[N-]=[N+]=[N]c1ccc2c(CC(=O)O)cc(=O)oc2c1': 2, '[N-]=[N+]=[N]c1ccc(C(=O)Nc2ccccc2Cl)cc1': 2, '[N-]=[N+]=[N]c1ccn(Cc2ccc(Cl)c(Cl)c2)n1': 2, '[N-]=[N+]=[N]c1n[nH]c2ccc([N+](=O)[O-])cc12': 2, '[N-]=[N+]=[N]c1ccc(-n2c(=O)[nH]c3cc(F)c(F)cc3c2=O)cc1': 2, '[N-]=[N+]=[N]c1ccc(N2CCN(c3ccc(O)cc3)CC2)cc1': 2, 'Cc1ccc2nc(-c3ccc([N]=[N+]=[N-])cc3O)oc2c1': 2, '[N-]=[N+]=[N][C@H]1CC[C@H](NC2CC2c2ccccc2)CC1': 1, '[N-]=[N+]=[N]c1n[nH]c2ccc(Cc3cc(F)cc(F)c

In [15]:
docking_results_values_ace = {}
for i in receptor_pdb_list:
    docking_results_values_ace[i] = []
    for j in range(0,len(docking_results_ace[i]),50):  
        sum_result = builtins.sum(float(k) for k in docking_results_ace[i][j:j+50])
        docking_results_values_ace[i].append([int(all_ace_keys_list[j].split('_')[1]),sum_result])

In [16]:
all_top10_ace_list = []
for i in docking_results_values_ace:
    sorted_data_ace = sorted(docking_results_values_ace[i], key=lambda x: x[1])
    top_ten_indices_ace = [index for index, value in sorted_data_ace[:10]]
    for j in top_ten_indices_ace:
        all_top10_ace_list.append(acytylene_list[j])

In [17]:
element_counts_all_top10_ace = Counter(all_top10_ace_list)
print(element_counts_all_top10_ace)
# Counter({'C#Cc1ccc2c(c1)C(=O)N(C1CCC(=O)NC1=O)C2=O': 9, 'C#Cc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 9, 'C#Cc1ccc(-c2ccc(O)cc2)cc1': 9, 'C#Cc1cnc(N)c(-c2ccccc2)n1': 7, 'C#Cc1ccc(C#Cc2ccccc2)cc1': 7, 'C#Cc1ccc(C(=O)N2CCOCC2)cc1': 6, 'C#Cc1cccc2c1OC(F)(F)O2': 5, 'C#Cc1ccc2ccccc2c1': 5, 'C#Cc1cccc2ccccc12': 4, 'C#Cc1ccc(-c2ccccn2)nc1': 4, 'C#CCN[C@@H]1CCc2ccccc21': 3, 'C#CC1(O)CN(S(=O)(=O)c2ccc(C)cc2)C1': 3, 'C#CCCNC(=O)c1ccc(N)c(N)c1': 3, 'C#CCn1c2ccccc2c2ccccc21': 2, 'C#CCN[C@@H]1CCc2ccccc21.CS(=O)(=O)O': 2, 'C#CCON1C(=O)c2ccccc2C1=O': 2, 'C#Cc1ccc(N2CCOCC2)cc1': 2, 'C#CCNC(=O)Cc1ccc(O)c(O)c1': 2, 'C#CCCN1C(=O)c2ccccc2C1=O': 1, 'C#Cc1cccc(OC(F)(F)F)c1': 1, 'C#Cc1ccc2[nH]nc(C)c2c1': 1, 'C#Cc1ccc2[nH]ncc2n1': 1, 'C#Cc1ccc(OC(F)(F)F)cc1': 1, 'C#Cc1nccc2ccccc12': 1}

Counter({'C#Cc1ccc2c(c1)C(=O)N(C1CCC(=O)NC1=O)C2=O': 9, 'C#Cc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 9, 'C#Cc1ccc(-c2ccc(O)cc2)cc1': 9, 'C#Cc1cnc(N)c(-c2ccccc2)n1': 7, 'C#Cc1ccc(C#Cc2ccccc2)cc1': 7, 'C#Cc1ccc(C(=O)N2CCOCC2)cc1': 6, 'C#Cc1cccc2c1OC(F)(F)O2': 5, 'C#Cc1ccc2ccccc2c1': 5, 'C#Cc1cccc2ccccc12': 4, 'C#Cc1ccc(-c2ccccn2)nc1': 4, 'C#CCN[C@@H]1CCc2ccccc21': 3, 'C#CC1(O)CN(S(=O)(=O)c2ccc(C)cc2)C1': 3, 'C#CCCNC(=O)c1ccc(N)c(N)c1': 3, 'C#CCn1c2ccccc2c2ccccc21': 2, 'C#CCN[C@@H]1CCc2ccccc21.CS(=O)(=O)O': 2, 'C#CCON1C(=O)c2ccccc2C1=O': 2, 'C#Cc1ccc(N2CCOCC2)cc1': 2, 'C#CCNC(=O)Cc1ccc(O)c(O)c1': 2, 'C#CCCN1C(=O)c2ccccc2C1=O': 1, 'C#Cc1cccc(OC(F)(F)F)c1': 1, 'C#Cc1ccc2[nH]nc(C)c2c1': 1, 'C#Cc1ccc2[nH]ncc2n1': 1, 'C#Cc1ccc(OC(F)(F)F)cc1': 1, 'C#Cc1nccc2ccccc12': 1})


In [18]:
all_results_ace_and_NH2 = top45_acylyene_list + all_top10_ace_list + all_top10_NH2_list + top45_NH2_list
print(Counter(all_results_ace_and_NH2))
# Counter({'C#Cc1ccc2c(c1)C(=O)N(C1CCC(=O)NC1=O)C2=O': 37, 'C#Cc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 28, 'C#Cc1cnc(N)c(-c2ccccc2)n1': 15, '[N-]=[N+]=[N]c1cccc2c1[nH]c1ccccc12': 12, 'C#Cc1ccc(-c2ccc(O)cc2)cc1': 11, 'C#Cc1ccc2ccccc2c1': 10, 'C#Cc1ccc(C#Cc2ccccc2)cc1': 9, '[N-]=[N+]=[N][C@@H](CO)c1cc2ccccc2c2ccccc12': 8, '[N-]=[N+]=[N][C@H](CO)c1ccc2cc3ccccc3cc2c1': 8, 'C#Cc1ccc(C(=O)N2CCOCC2)cc1': 6, 'C#CCN[C@@H]1CCc2ccccc21': 5, 'C#CCCNC(=O)c1ccc(N)c(N)c1': 5, 'C#Cc1cccc2ccccc12': 5, 'C#Cc1cccc2c1OC(F)(F)O2': 5, '[N-]=[N+]=[N]C12CCCC(NC(=O)OCc3ccccc3)(CC1)C2': 5, '[N-]=[N+]=[N]c1ccc2cc(-c3ccccc3)c(=O)oc2c1': 5, '[N-]=[N+]=[N]c1ccc2cc(-c3nc4ccccc4[nH]3)c(=O)oc2c1': 5, '[N-]=[N+]=[N][C@@H]1CCc2[nH]c3ccccc3c2C1': 5, 'C#CCON1C(=O)c2ccccc2C1=O': 4, 'C#Cc1ccc(-c2ccccn2)nc1': 4, '[N-]=[N+]=[N]Cc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 4, '[N-]=[N+]=[N]c1ccc(N2CCN(C(=O)c3ccccc3F)CC2)cc1': 4, '[N-]=[N+]=[N]c1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 4, '[N-]=[N+]=[N]c1cc(F)c2ncccc2c1': 4, '[N-]=[N+]=[N]c1ccc2c(c1)NC(=O)CC2': 4, 'C#CCn1c2ccccc2c2ccccc21': 3, 'C#CCN[C@@H]1CCc2ccccc21.CS(=O)(=O)O': 3, 'C#CC1(O)CN(S(=O)(=O)c2ccc(C)cc2)C1': 3, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)[N-]c2cnc3ccccc3n2)cc1': 3, '[N-]=[N+]=[N]c1ccc2c(=O)[nH][nH]c(=O)c2c1': 3, '[N-]=[N+]=[N]CCC(=O)N1Cc2ccccc2C#Cc2ccccc21': 3, '[N-]=[N+]=[N]c1ccc(-n2c(=O)[nH]c3cc(F)c(F)cc3c2=O)cc1': 3, 'Cc1ccc2nc(-c3ccc([N]=[N+]=[N-])cc3O)oc2c1': 3, '[N-]=[N+]=[N]c1nnc(Cc2ccc(F)cc2)o1': 3, '[N-]=[N+]=[N][C@@H]1CCc2ccc(F)cc21': 3, '[N-]=[N+]=[N]c1ccc2cccc(O)c2c1': 3, '[N-]=[N+]=[N]c1cc2ccccc2cc1O': 3, 'C#Cc1ccc2[nH]nc(C)c2c1': 2, 'C#Cc1cccc2nccn12': 2, 'C#Cc1ccccc1C(F)(F)F': 2, 'C#Cc1ccc(N2CCOCC2)cc1': 2, 'C#CCNC(=O)Cc1ccc(O)c(O)c1': 2, '[N-]=[N+]=[N]c1n[nH]c2ccc(Cc3cc(F)cc(F)c3)cc12': 2, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)NC(=O)c2ccccc2)cc1': 2, '[N-]=[N+]=[N]C1CCN(C(=O)OCc2ccccc2)C1': 2, '[N-]=[N+]=[N]c1ccc(CNc2ccccc2)cc1': 2, '[N-]=[N+]=[N]c1nc(-c2cccc(C(F)(F)F)c2)n[nH]1': 2, '[N-]=[N+]=[N]c1ccc2c(CC(=O)O)cc(=O)oc2c1': 2, '[N-]=[N+]=[N]c1ccc(C(=O)Nc2ccccc2Cl)cc1': 2, '[N-]=[N+]=[N]c1ccn(Cc2ccc(Cl)c(Cl)c2)n1': 2, '[N-]=[N+]=[N]c1n[nH]c2ccc([N+](=O)[O-])cc12': 2, 'CNc1cc2[nH]c(=O)n(-c3ccc([N]=[N+]=[N-])cc3)c(=O)c2cc1F': 2, 'N#Cc1c[nH]c2ccc([N]=[N+]=[N-])cc12': 2, '[N-]=[N+]=[N]c1ccc(N2CCN(c3ccc(O)cc3)CC2)cc1': 2, '[N-]=[N+]=[N]c1ccc2[nH]c(=O)c(=O)[nH]c2c1': 2, '[N-]=[N+]=[N]c1ccc2c3c(cccc13)C(=O)OC2=O': 2, '[N-]=[N+]=[N]CCC(O)c1ccccc1': 2, '[N-]=[N+]=[N]C1CC2(CNC(=O)C2)C1': 2, '[N-]=[N+]=[N]CCC1CCc2ccc3c(c21)CCO3': 2, '[N-]=[N+]=[N][C@H]1CCc2ccc(F)cc21': 2, 'C#Cc1cccc(C(=O)O)c1': 1, 'C#Cc1cnc2[nH]ncc2c1': 1, 'C#Cc1cc(F)c(F)cc1F': 1, 'C#Cc1cccc2cn[nH]c12': 1, 'C#Cc1cccc(=O)[nH]1.O=C(O)C(F)(F)F': 1, 'C#Cc1cccc(C(N)=O)c1': 1, 'C#CCN1C(=O)c2ccccc2C1=O': 1, 'C#CCOc1ccccc1C=O': 1, 'C#CC(=O)c1ccccc1Cl': 1, 'C#CC(N)Cc1cccc(F)c1.Cl': 1, 'C#CCOC(=O)c1ccccc1': 1, 'C#Cc1c[nH]c2ccc([N+](=O)[O-])cc12': 1, 'C#CCCN1C(=O)c2ccccc2C1=O': 1, 'C#Cc1cccc(OC(F)(F)F)c1': 1, 'C#Cc1ccc2[nH]ncc2n1': 1, 'C#Cc1ccc(OC(F)(F)F)cc1': 1, 'C#Cc1nccc2ccccc12': 1, '[N-]=[N+]=[N][C@H]1CC[C@H](NC2CC2c2ccccc2)CC1': 1, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)NCc2ccccc2)cc1': 1, '[N-]=[N+]=[N]c1cccc(-c2cccc(C(=O)O)c2)c1O': 1, '[N-]=[N+]=[N]c1cc(-c2cccc(SC(F)(F)F)c2)n[nH]1': 1, '[N-]=[N+]=[N]c1ccc(O)c(-c2nc3ccccc3s2)c1': 1, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)N2CCCc3ccccc32)cc1': 1, '[N-]=[N+]=[N]c1ccccc1C#Cc1ccc(F)cc1': 1, '[N-]=[N+]=[N]c1ccc2c(c1)C(=O)N(C1CCC(=O)NC1=O)C2=O': 1, '[N-]=[N+]=[N]c1ccc2c(c1)CN(C1CCC(=O)NC1=O)C2=O': 1, '[N-]=[N+]=[N]c1cc(F)c2cnccc2c1': 1, '[N-]=[N+]=[N][C@@H](Cc1ccc(C(=O)c2ccccc2)cc1)C(=O)O': 1, '[N-]=[N+]=[N]C1CCOc2ccccc2C1': 1, '[N-]=[N+]=[N][C@H]1c2cc(Br)ccc2C[C@H]1O': 1, 'C[C@H]([N]=[N+]=[N-])c1ccc(C(=O)Nc2ccnc3[nH]ccc23)cc1': 1, '[N-]=[N+]=[N]c1ccc(-c2cccc(F)c2)cc1': 1, '[N-]=[N+]=[N]c1cc(-c2ccc(C(F)(F)F)cc2)n[nH]1': 1, '[N-]=[N+]=[N]c1c2c(cc3c1C(C(F)F)CC3)CCC2': 1, 'CNc1ccc2nc3ccc([N]=[N+]=[N-])cc3[s+]c2c1': 1, '[N-]=[N+]=[N]c1cccc(C(O)(C(F)(F)F)C(F)(F)F)c1': 1, 'CC1CCc2cc3c(c([N]=[N+]=[N-])c21)CCC3': 1, '[N-]=[N+]=[N]c1ccc(C(F)(F)F)c2cccnc12': 1, '[N-]=[N+]=[N]c1cc(F)cc2c1NC(=O)C2': 1, '[N-]=[N+]=[N]c1nc2c(N3CCOCC3)cccc2s1': 1, '[N-]=[N+]=[N]CCc1c[nH]c2cc(F)ccc12': 1, '[N-]=[N+]=[N]c1nc2ccc(OC(F)(F)F)cc2s1': 1, '[N-]=[N+]=[N]c1nc2cc(C(F)(F)F)ccn2n1': 1, '[N-]=[N+]=[N]C1CCCc2cc(O)ccc21': 1, '[N-]=[N+]=[N]c1ccc2c(c1)CCC(=O)N2': 1, 'CC(=O)Nc1ccc(Cc2ccc([N]=[N+]=[N-])cc2)cc1': 1, '[N-]=[N+]=[N]c1ccccc1Cl': 1, '[N-]=[N+]=[N]c1cccc2c(Cl)cccc12': 1, 'Cc1ccc2[nH]c([N]=[N+]=[N-])nc2c1': 1, '[N-]=[N+]=[N]CCNC(=O)Cc1cccc2ccccc12': 1, '[N-]=[N+]=[N]c1ccc(CCc2ccccc2)cc1': 1, '[N-]=[N+]=[N]c1nc(-c2ccc(C(F)(F)F)cc2)cs1': 1, '[N-]=[N+]=[N]c1c[nH]c2ncnc(Cl)c12': 1, '[N-]=[N+]=[N]CC1CN(C(c2ccccc2)c2ccccc2)C1': 1, '[N-]=[N+]=[N]c1n[nH]c2cc(C(F)(F)F)ccc12': 1, '[N-]=[N+]=[N]C(Cc1ccc(-c2ccccc2)cc1)C(=O)O': 1, '[N-]=[N+]=[N]c1ccc2cc(C(=O)O)cnc2c1': 1, '[N-]=[N+]=[N]c1ccn(Cc2ccccc2)n1': 1, 'Cc1nc(C[N]=[N+]=[N-])nc2c1CCCC2': 1, '[N-]=[N+]=[N]c1cccc2c1C=CCO2': 1, 'Cc1ccc(-c2cccc([N]=[N+]=[N-])c2)cc1': 1, '[N-]=[N+]=[N]c1ccn(-c2c(F)cccc2F)n1': 1, '[N-]=[N+]=[N]c1nnc(-c2ccccc2)o1': 1, '[N-]=[N+]=[N]c1c(F)ccc2cnccc12': 1, '[N-]=[N+]=[N]c1cccc2cc(C(=O)O)oc12': 1, 'Cc1cc([N]=[N+]=[N-])n(-c2ccccc2)n1': 1})

Counter({'C#Cc1ccc2c(c1)C(=O)N(C1CCC(=O)NC1=O)C2=O': 37, 'C#Cc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 28, 'C#Cc1cnc(N)c(-c2ccccc2)n1': 15, '[N-]=[N+]=[N]c1cccc2c1[nH]c1ccccc12': 12, 'C#Cc1ccc(-c2ccc(O)cc2)cc1': 11, 'C#Cc1ccc2ccccc2c1': 10, 'C#Cc1ccc(C#Cc2ccccc2)cc1': 9, '[N-]=[N+]=[N][C@@H](CO)c1cc2ccccc2c2ccccc12': 8, '[N-]=[N+]=[N][C@H](CO)c1ccc2cc3ccccc3cc2c1': 8, 'C#Cc1ccc(C(=O)N2CCOCC2)cc1': 6, 'C#CCN[C@@H]1CCc2ccccc21': 5, 'C#CCCNC(=O)c1ccc(N)c(N)c1': 5, 'C#Cc1cccc2ccccc12': 5, 'C#Cc1cccc2c1OC(F)(F)O2': 5, '[N-]=[N+]=[N]C12CCCC(NC(=O)OCc3ccccc3)(CC1)C2': 5, '[N-]=[N+]=[N]c1ccc2cc(-c3ccccc3)c(=O)oc2c1': 5, '[N-]=[N+]=[N]c1ccc2cc(-c3nc4ccccc4[nH]3)c(=O)oc2c1': 5, '[N-]=[N+]=[N][C@@H]1CCc2[nH]c3ccccc3c2C1': 5, 'C#CCON1C(=O)c2ccccc2C1=O': 4, 'C#Cc1ccc(-c2ccccn2)nc1': 4, '[N-]=[N+]=[N]Cc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 4, '[N-]=[N+]=[N]c1ccc(N2CCN(C(=O)c3ccccc3F)CC2)cc1': 4, '[N-]=[N+]=[N]c1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 4, '[N-]=[N+]=[N]c1cc(F)c2ncccc2c1': 4, '[N-]=[N+]=[N]c1ccc2c(c1)NC(=O

In [19]:
elements_with_count_greater_than_1 = {key: value for key, value in Counter(all_results_ace_and_NH2).items() if value > 1}
print(elements_with_count_greater_than_1)

{'C#Cc1cnc(N)c(-c2ccccc2)n1': 15, 'C#Cc1ccc(-c2ccc(O)cc2)cc1': 11, 'C#CCn1c2ccccc2c2ccccc21': 3, 'C#Cc1cccc2c1CN(C1CCC(=O)NC1=O)C2=O': 28, 'C#Cc1ccc2ccccc2c1': 10, 'C#CCON1C(=O)c2ccccc2C1=O': 4, 'C#CCN[C@@H]1CCc2ccccc21': 5, 'C#Cc1ccc(C#Cc2ccccc2)cc1': 9, 'C#Cc1ccc2c(c1)C(=O)N(C1CCC(=O)NC1=O)C2=O': 37, 'C#Cc1ccc2[nH]nc(C)c2c1': 2, 'C#Cc1cccc2nccn12': 2, 'C#CCCNC(=O)c1ccc(N)c(N)c1': 5, 'C#Cc1ccccc1C(F)(F)F': 2, 'C#CCN[C@@H]1CCc2ccccc21.CS(=O)(=O)O': 3, 'C#Cc1cccc2ccccc12': 5, 'C#Cc1cccc2c1OC(F)(F)O2': 5, 'C#Cc1ccc(-c2ccccn2)nc1': 4, 'C#Cc1ccc(C(=O)N2CCOCC2)cc1': 6, 'C#Cc1ccc(N2CCOCC2)cc1': 2, 'C#CC1(O)CN(S(=O)(=O)c2ccc(C)cc2)C1': 3, 'C#CCNC(=O)Cc1ccc(O)c(O)c1': 2, '[N-]=[N+]=[N]c1n[nH]c2ccc(Cc3cc(F)cc(F)c3)cc12': 2, '[N-]=[N+]=[N][C@@H](CO)c1cc2ccccc2c2ccccc12': 8, '[N-]=[N+]=[N][C@H](CO)c1ccc2cc3ccccc3cc2c1': 8, '[N-]=[N+]=[N]c1ccc(S(=O)(=O)NC(=O)c2ccccc2)cc1': 2, '[N-]=[N+]=[N]C12CCCC(NC(=O)OCc3ccccc3)(CC1)C2': 5, '[N-]=[N+]=[N]C1CCN(C(=O)OCc2ccccc2)C1': 2, '[N-]=[N+]=[N]Cc1cccc2c1CN(

In [20]:
elements_with_count_greater_than_1_N3 = []
elements_with_count_greater_than_1_ace = []
for i in elements_with_count_greater_than_1:
    if i in N3_list:
        elements_with_count_greater_than_1_N3.append(i)
    elif i in acytylene_list:
        elements_with_count_greater_than_1_ace.append(i)
    else:
        print(i)
print(len(elements_with_count_greater_than_1_N3),len(elements_with_count_greater_than_1_ace),len(elements_with_count_greater_than_1))

39 21 60


In [21]:
with open ('/home/test/BiDe/BiDe_NH2.csv','r') as f:
    lines = f.readlines()
    amine_smiles = [l.split(',') for l in lines]
with open ('/home/test/BiDe/BiDe_term_acetylene.csv','r') as f:
    lines = f.readlines()
    ace_smiles = [l.split(',') for l in lines]

In [22]:
amine_smiles = amine_smiles[1:]
ace_smiles = ace_smiles[1:]

In [23]:
a = []
cas_list_NH2 = []
cas_list_ace = []
for i in range(len(amine_smiles)):
    ligand_smiles = amine_smiles[i][-10].split('.')
    for j in range(len(ligand_smiles)):
        convert_result = convert_primary_amines_to_diazonium(ligand_smiles[j])
        if convert_result in elements_with_count_greater_than_1:
            if ligand_smiles[j] not in a:
                a.append(convert_result)
                cas_list_NH2.append([amine_smiles[i][0],amine_smiles[i][-9]])
for i in range(len(ace_smiles)):
    ligand_smiles = ace_smiles[i][-10]
    if ligand_smiles in elements_with_count_greater_than_1:
        if ligand_smiles not in a:
            a.append(ligand_smiles)
            cas_list_ace.append([ace_smiles[i][0],amine_smiles[i][-9]])
               
print(cas_list_NH2)
print(cas_list_ace)
# ['41339-17-7', '116650-33-0', '175230-02-1', '758699-84-2', '4108-61-6', '895929-56-3', '1383973-53-2', '3682-14-2', '313645-14-6', '127-71-9', '1108745-30-7', '1255942-06-3', '1082649-42-0', '2172063-36-2', '31911-20-3', '1213581-06-6', '24007-66-7', '502686-01-3', '936500-99-1', '444289-05-8', '1212946-34-3', '2829282-04-2', '85157-21-7', '74853-08-0', '888-79-9', '731859-02-2', '1391354-92-9', '1159822-27-1', '159768-57-7', '22246-07-7', '5417-63-0', '191732-72-6', '6492-86-0', '5053-63-4', '1053239-39-6', '6973-93-9', '18992-86-4', '114843-08-2', '967-80-6', '936501-00-7', '828911-26-8', '136236-51-6', '1349199-60-5', '704-41-6', '4282-77-3', '15727-65-8', '2227272-78-6', '92866-00-7', '41876-72-6', '1332585-62-2', '2154356-63-3', '1416990-26-5', '1093307-29-9', 'BD01624241', '161735-79-1', '1408074-62-3', '1374115-61-3', '2949-26-0', '4616-63-1', '162318-34-5', '851895-20-0', '1202355-37-0']

# ['41339-17-7', '116650-33-0', '175230-02-1', '758699-84-2', '4108-61-6', '895929-56-3', '1383973-53-2', '3682-14-2', '313645-14-6', '127-71-9', '1108745-30-7', '1255942-06-3', '1082649-42-0', '2172063-36-2', '31911-20-3', '1213581-06-6', '24007-66-7', '502686-01-3', '936500-99-1', '444289-05-8', '1212946-34-3', '2829282-04-2', '85157-21-7', '74853-08-0', '888-79-9', '731859-02-2', '1391354-92-9', '1159822-27-1', '159768-57-7', '22246-07-7', '5417-63-0', '191732-72-6', '6492-86-0', '5053-63-4', '1053239-39-6', '6973-93-9', '18992-86-4', '114843-08-2', '967-80-6', '936501-00-7', '828911-26-8']
# ['136236-51-6', '1349199-60-5', '704-41-6', '4282-77-3', '15727-65-8', '2227272-78-6', '92866-00-7', '41876-72-6', '1332585-62-2', '2154356-63-3', '1416990-26-5', '1093307-29-9', 'BD01624241', '161735-79-1', '1408074-62-3', '1374115-61-3', '2949-26-0', '4616-63-1', '162318-34-5', '851895-20-0', '1202355-37-0']

[['41339-17-7', 'BD4695'], ['116650-33-0', 'BD47512'], ['175230-02-1', 'BD482896'], ['758699-84-2', 'BD496790'], ['4108-61-6', 'BD562129'], ['895929-56-3', 'BD569522'], ['1383973-53-2', 'BD570366'], ['3682-14-2', 'BD6187'], ['313645-14-6', 'BD624665'], ['127-71-9', 'BD626285'], ['1108745-30-7', 'BD683408'], ['1255942-06-3', 'BD763165'], ['1082649-42-0', 'BD767020'], ['2172063-36-2', 'BD00816020'], ['31911-20-3', 'BD00824079'], ['1213581-06-6', 'BD00849467'], ['24007-66-7', 'BD01084282'], ['502686-01-3', 'BD01117794'], ['936500-99-1', 'BD01186666'], ['444289-05-8', 'BD01281411'], ['1212946-34-3', 'BD01388199'], ['2829282-04-2', 'BD01400768'], ['85157-21-7', 'BD160257'], ['74853-08-0', 'BD163967'], ['888-79-9', 'BD173217'], ['731859-02-2', 'BD209572'], ['1391354-92-9', 'BD209813'], ['1159822-27-1', 'BD214718'], ['159768-57-7', 'BD102237'], ['22246-07-7', 'BD110211'], ['5417-63-0', 'BD12850'], ['191732-72-6', 'BD136128'], ['6492-86-0', 'BD14369'], ['5053-63-4', 'BD28881'], ['1053239-39-6'

In [24]:
N3_list_lower_65 = []
for i in range(len(all_N3_keys_list)):
    for j in receptor_pdb_list:
        if float(docking_results_NH2[j][i])<-6.5:
            if N3_list[int(all_N3_keys_list[i].split('_')[0])] not in N3_list_lower_65:
                N3_list_lower_65.append(N3_list[int(all_N3_keys_list[i].split('_')[0])])
for i in range(len(all_ace_keys_list)):
    for j in receptor_pdb_list:
        if float(docking_results_ace[j][i])<-6.5:
            if N3_list[int(all_ace_keys_list[i].split('_')[0])] not in N3_list_lower_65:
                N3_list_lower_65.append(N3_list[int(all_ace_keys_list[i].split('_')[0])])

In [27]:
ace_list_lower_65 = []
for i in range(len(all_ace_keys_list)):
    for j in receptor_pdb_list:
        if float(docking_results_ace[j][i])<-6.5:
            if acytylene_list[int(all_ace_keys_list[i].split('_')[1])] not in ace_list_lower_65:
                ace_list_lower_65.append(acytylene_list[int(all_ace_keys_list[i].split('_')[1])])
for i in range(len(all_N3_keys_list)):
    for j in receptor_pdb_list:
        if float(docking_results_NH2[j][i])<-6.5:
            if acytylene_list[int(all_N3_keys_list[i].split('_')[1])] not in ace_list_lower_65:
                ace_list_lower_65.append(acytylene_list[int(all_N3_keys_list[i].split('_')[1])])

In [28]:
len(ace_list_lower_65)
# save_list_to_csv( '/home/test/BiDe/N3_list_lower_65.csv',N3_list_lower_65)
# save_list_to_csv( '/home/test/BiDe/ace_list_lower_65.csv',ace_list_lower_65)
# save_list_to_csv( '/home/test/BiDe/cas_list_NH2.csv',cas_list_NH2)
# save_list_to_csv( '/home/test/BiDe/cas_list_ace.csv',cas_list_ace)

In [2]:
print('1')

1
