In [1]:
# Import libraries
import pandas as pd
pd.set_option('display.max_columns', None)
import os

In [2]:
def build_full_path(insilico      = False, protbound       = False, 
                    ATP_site_only = False, peripheral_only = False, 
                    Ro5_Filtering = False, Ro3_Filtering   = False,): 
    
    """
    Parameters:
    insilico        (bool):            Use the descriptors computed from the in silico generated conformation(s).
    protbound       (bool):            Use the descriptors computed from the protein-bound conformation(s).
    ATP_site_only   (bool. optional):  Optionally filter for ATP site ligands only.   Defaults to False.
    peripheral_only (bool, optional):  Optionally filter for peripheral ligands only. Defaults to False.
    Ro5_Filtering   (bool, optional):  Optionally filter by Ro5 criteria. Defaults to False.
    Ro3_Filtering   (bool, optional):  Optionally filter by Ro3 criteria. Defaults to False.
    """
    
# in silico generated OR protein bound 
    parent_folder = os.path.join('Analysis', 'Descriptors')
    if insilico == True and protbound == False:
        path = os.path.join(parent_folder, 'in_silico_Generated_Conformations')
    elif protbound == True and insilico == False: 
        path = os.path.join(parent_folder, 'Protein_Bound_Conformations')
    else: 
        print("Set either insilico or protbound parameter to 'True'.")

# Optionally: Filter for ATP-Site ligands only
    if ATP_site_only == True and peripheral_only == False:
        folder = 'ATPsite'
# Optionally: Filter for peripheral ligands only
    elif peripheral_only == True and ATP_site_only == False:
        folder = 'Peripheral'
    else:
        folder = 'Binding_site_agnostic'

# Optionally: Filter for Ro5-compliant molecules only 
    if Ro5_Filtering == True: 
        subfolder = "Ro5"

# Optionally: Filter for Ro5-compliant molecules only 
    if Ro3_Filtering == True: 
        subfolder = "Ro3"
    else: 
        subfolder = "All"

# Build the full path 
    full_path = os.path.join(os.path.join(path, folder), subfolder)

    # If the directory does not exist yet, create it
    if not os.path.exists(full_path):
        print("Path does not exist.")

    print(f"Working directory = {full_path}")  

    return full_path

# ADJUST HERE FOR THE DIFFERENT VARIANTS


In [3]:
full_path = build_full_path(insilico      = True, protbound      = False, 
                            ATP_site_only = False, peripheral_only = False, 
                            Ro5_Filtering = False, Ro3_Filtering   = False)

Working directory = Analysis/Descriptors/in_silico_Generated_Conformations/Binding_site_agnostic/All


In [4]:
# Read in descriptor data
cols = ['Dataset', 'ID', 'NP-Likeness', 'Fsp3', 'FCStereo', 'nSPS', 'nPBF', 'ΣNPR']
df = pd.read_excel(os.path.join(full_path, 'Averaged_Descriptor_Values.xlsx'), usecols = cols)

In [5]:
def print_minmax_values(datasets = df['Dataset'].unique(), i=7, ascending=False, filename=None):
    filepath = os.path.join(full_path, filename)
    with open(filepath, 'w') as f:
        for dataset in datasets:
            temp_df = df[df['Dataset'] == dataset].reset_index(drop=True)
            for desc in ['NP-Likeness', 'Fsp3', 'FCStereo', 'nSPS', 'nPBF', 'ΣNPR']: 
                temp_df_ = temp_df[['ID', desc]].reset_index(drop=True)
                temp_df_ = temp_df_.sort_values(by=[desc, 'ID'], ascending=ascending).reset_index(drop=True)
                temp_df_ = temp_df_.head(i)
                temp_df_[desc] = temp_df_[desc].apply(lambda x: f"({x:.3f})")

                f.write(f"\n\n\n{dataset}\n")
                f.write(temp_df_.to_string(index=False))
                f.write("\n")

# Write max. values to .txt file

In [6]:
print_minmax_values(i=7, ascending=False, filename="Max_Descriptor_Values.txt")

# Write min. values to .txt file

In [7]:
print_minmax_values(i=7, ascending=True, filename="Min_Descriptor_Values.txt")