In [59]:
import pandas as pd
import numpy as np
from glob import glob
import sys
import os

We will use the following function from

In [114]:
def _get_the_dataframe(dir_path):
    list_files = glob(dir_path + '*')
    assert list_files
    # Sort the values in natural order
    list_files.sort(key = lambda x: x.split('/')[-1].split('_')[0])

    list_of_dfs = [pd.read_csv(i, index_col='Ligando') for i in list_files]
    df = pd.concat(list_of_dfs, axis=1)
    df.index.names = ['ligand'] # Rename the index
    # Rename columns
    df.columns = df.columns.str.replace('DkScore_', '')
    return df

def process_smina_docking_results(dir_path, mol_library, docking_tool, save = True):
    df = _get_the_dataframe(dir_path)
    # Add the Activity column
    df['activity'] = [1 if 'ligand' in i else 0 for i in df.index ]
    # Sort indices by natural ordering
    df['order'] = [int(i.split('_')[-1]) for i in df.index]
    df = df.reset_index()\
                .sort_values(['activity', 'order'], ascending=[False, True])\
                .set_index('ligand')\
                .drop('order', axis=1)
    # Save the dataframe as csv
    n_cols = df.shape[1]
    m_mols = df.shape[0]
    print('Shape of the final df:', df.shape)
    if save:
        df.to_csv(f'./{mol_library}_{docking_tool}_{n_cols}_prots_{m_mols}_mols.csv')
    return df

### DEKOIS 
#### VINARDO

In [116]:
# DIR OF FILES
mol_library = 'DEKOIS2'
docking_tool = 'VINARDO'
DEKOIS_PATH = f'../../FILES/CRYSTALS/DOKINGS/DOCK_136_crys_{mol_library}/{docking_tool}/CSV/'

df_dekois_vrd = process_smina_docking_results(DEKOIS_PATH, mol_library, docking_tool)

Shape of the final df: (1240, 137)


### DUD 
#### VINARDO

In [118]:
# DIR OF FILES
mol_library = 'DUD2006'
docking_tool = 'VINARDO'
DUD_PATH = f'../../FILES/CRYSTALS/DOKINGS/DOCK_136_crys_{mol_library}/{docking_tool}/CSV/'

df_dud_vrd = process_smina_docking_results(DUD_PATH, mol_library, docking_tool)

Shape of the final df: (5891, 137)
