In [1]:
# IMPORTS-- run first
import pandas as pd
import os
import subprocess

<h1>1. Creating Individual Small Molecule Data Files</h1>
This section uses pandas to make individual files from the True Actives, True Inactives, and LUDe and DeepCoy Decoys data sets. These files will then be modified using obabel to convert them to 3D format-- .mol2 and .sdf, in this case.

<h3>1.1 Creating individual SMILES files with unique names</h3>

In [None]:
A_df = pd.read_csv('True_Actives/Actives_smiles.csv')

In [None]:
## Making individual .smi files from smiles in csv, with the LUDe query number as the id

A_id_col = A_df.columns[1]
A_smi_col = A_df.columns[2]

A_output_folder = "True_Actives/Smiles"
os.makedirs(A_output_folder, exist_ok = True)

for index, row in A_df.iterrows():
    unique_id = row[A_id_col]  # Get the unique number
    smiles_code = row[A_smi_col]  # Get the SMILES string
    
    # Define file path
    file_path = os.path.join(A_output_folder, f"{unique_id}.smi")
    
    # Write SMILES to the file
    with open(file_path, "w") as file:
        file.write(smiles_code)

In [None]:
I_df = pd.read_csv('True_Inactives/I_smiles.csv')

I_output_folder = "True_Inactives/Smiles"
os.makedirs(I_output_folder, exist_ok = True)

for index, row in I_df.iterrows():
    unique_id = row[I_id_col]  # Get the unique number
    smiles_code = row[I_smi_col]  # Get the SMILES string
    
    # Define file path
    file_path = os.path.join(I_output_folder, f"{unique_id}.smi")
    
    # Write SMILES to the file
    with open(file_path, "w") as file:
        file.write(smiles_code)

In [None]:
LD_df = pd.read_csv('LUDe_Decoys/LD_smiles.csv')

LD_id_col = LD_df.columns[0]
LD_query_col = LD_df.columns[2]
LD_smi_col = LD_df.columns[1]

LD_output_folder = "LUDe_Decoys/Smiles"
os.makedirs(LD_output_folder, exist_ok = True)

for index, row in LD_df.iterrows():
    unique_id = row[LD_id_col]
    query_id = row[LD_query_col]
    smiles_code = row[LD_smi_col]

    file_path = os.path.join(LD_output_folder, f"{query_id}_{unique_id}.smi")

    with open(file_path, "w") as file:
        file.write(smiles_code)

<h3>1.2 Converting each SMILES file into a .mol2 File</h3>

In [None]:
## Converting .smi files to .mol2 files put in a separate folder

A_smiles_folder = "True_Actives/Smiles"
A_mol2_output_folder = "True_Actives/Mol2"
os.makedirs(A_mol2_output_folder, exist_ok = True)

for smiles_file in os.listdir(A_smiles_folder):
    if smiles_file.endswith(".smi"):
        file_path = os.path.join(A_smiles_folder, smiles_file)

        base_name = os.path.splitext(smiles_file)[0] # extracting just the query number from the filename

        mol2_file = os.path.join(A_mol2_output_folder, f"{base_name}.mol2")

        command = f"obabel {file_path} -O {mol2_file} --gen3d -h"

        try:
            subprocess.run(command, shell = True, check = True)
            print(f"Successfully converted {file_path} to {mol2_file}")
        except subprocess.CalledProcessError as e:
            print(f"Error processing {file_path}: {e}")
            

In [None]:
## Converting .smi files to .mol2 files put in a separate folder

LD_smiles_folder = "LUDe_Decoys/Smiles"
LD_mol2_output_folder = "LUDe_Decoys/Mol2"
os.makedirs(LD_mol2_output_folder, exist_ok = True)

for smiles_file in os.listdir(LD_smiles_folder):
    if smiles_file.endswith(".smi"):
        file_path = os.path.join(LD_smiles_folder, smiles_file)

        base_name = os.path.splitext(smiles_file)[0] # extracting just the query number from the filename

        mol2_file = os.path.join(LD_mol2_output_folder, f"{base_name}.mol2")

        # Check if the .mol2 file already exists
        if os.path.exists(mol2_file):
            print(f"Skipping {file_path}, {mol2_file} already exists.")
            continue  # Skip this file
            
        command = f"obabel {file_path} -O {mol2_file} --gen3d -h"

        try:
            subprocess.run(command, shell = True, check = True)
            print(f"Successfully converted {file_path} to {mol2_file}")
        except subprocess.CalledProcessError as e:
            print(f"Error processing {file_path}: {e}")

<h1>2. Docking with Smina</h1>
In this step, all small molecules will be docked into the H-alpha region of the AlphaFold structure of monomeric MYC protein. Coordinates for this region were derived from Chimera; the ~40 residues in the literature-described region (residues 370-409) on MYC were selected, and the coordinates of the centroid were calculated using available commands in Chimera.

In [5]:
A_mol2_folder = "True_Actives/Mol2"
MYC = "PreppedAlphaFoldMYCmonomer.mol2"
A_docked_folder = "True_Actives/Docked"
os.makedirs(A_docked_folder, exist_ok = True)
A_scores_folder = "True_Actives/Docked/Scores"
os.makedirs(A_scores_folder, exist_ok = True)

for mol2_files in os.listdir(A_mol2_folder):
    if mol2_files.endswith(".mol2"):
        file_path = os.path.join(A_mol2_folder, mol2_files)
        base_name = os.path.splitext(mol2_files)[0]
        docked_mol2_file = os.path.join(A_docked_folder, f"Docked_{base_name}.mol2")
        docked_score_file = os.path.join(A_scores_folder, f"Docked_{base_name}.txt")

        if os.path.exists(docked_mol2_file):
            print(f"Skipping {file_path}, {docked_mol2_file} already exists.")
            continue

        command = f"smina -r {MYC} -l {file_path} -o {docked_mol2_file} --center_x -5.20920191 --center_y 7.67740661 --center_z -5.84617042 --size_x 30 --size_y 30 --size_z 30 --num_modes 1 --exhaustiveness 1 --seed 0 > {docked_score_file} 2>&1"

        try:
            subprocess.run(command, shell = True, check = True)
            print(f"Successfully converted {file_path} to {docked_mol2_file}")
        except subprocess.CalledProcessError as e:
            print(f"Error processing {file_path}: {e}")


Successfully converted True_Actives/Mol2/Query_23.mol2 to True_Actives/Docked/Docked_Query_23.mol2
Successfully converted True_Actives/Mol2/Query_26.mol2 to True_Actives/Docked/Docked_Query_26.mol2
Successfully converted True_Actives/Mol2/Query_44.mol2 to True_Actives/Docked/Docked_Query_44.mol2
Successfully converted True_Actives/Mol2/Query_25.mol2 to True_Actives/Docked/Docked_Query_25.mol2
Successfully converted True_Actives/Mol2/Query_13.mol2 to True_Actives/Docked/Docked_Query_13.mol2
Successfully converted True_Actives/Mol2/Query_32.mol2 to True_Actives/Docked/Docked_Query_32.mol2
Successfully converted True_Actives/Mol2/Query_36.mol2 to True_Actives/Docked/Docked_Query_36.mol2
Successfully converted True_Actives/Mol2/Query_37.mol2 to True_Actives/Docked/Docked_Query_37.mol2
Successfully converted True_Actives/Mol2/Query_11.mol2 to True_Actives/Docked/Docked_Query_11.mol2
Successfully converted True_Actives/Mol2/Query_18.mol2 to True_Actives/Docked/Docked_Query_18.mol2
Successful