## Compute Consensus Scoring using Protein Conformational Selection

In [1]:
import pandas as pd
import numpy as np
import glob, os, sys
sys.path.append('..')
from modules.run_or_load_decorator import run_or_load

### Load the data
Load the data frame containing the raw docking scoring results.

In [2]:
file_name = './df_DkSc_results_COCRYS_CSAR_DEKOIS_DUD.pkl'
X_merged_dksc = pd.read_pickle(file_name)
# Extract activity column
y_true_merged = X_merged_dksc['activity']
# Drop column from merged_dkksc
X_merged_dksc = X_merged_dksc.drop('activity', axis=1)
X_merged_dksc.shape

(3466, 402)

### Load train test splitting murcko data frame
Load the murcko scaffolds dataframe to perform scaffold splitting

In [3]:
sys.path.append('../2_Docking_analysis/')
from scaffold_splitter import train_test_scaffold_split

# Compute or load the dataframe containing the Generic Murcko Scaffolds
file = '../2_Docking_analysis/df_COCRYS_CSAR_DUD_DEKOIS_Murcko_Scaffolds_SMILES.obj'

df_scff_murcko = pd.read_pickle(file)
df_scff_murcko.shape

(3466, 3)

### Read RFE Selectors to get the preselected features

In [4]:
%run ./5_Helper_get_RFE_preselected_conformations.ipynb

In [5]:
# Instead of been a list as with ML results, here selectors is a dictionary containing selector name and preselected conformations
rfe_preselections.keys()

dict_keys(['LR_rand', 'RF_rand', 'XGB_rand', 'LR_scff', 'RF_scff', 'XGB_scff'])

### Import plotmetrics module to evaluate docking results

In [6]:
%run ../modules/plotting_metrics.py

### Import Consensus Scoring Related Functions

In [7]:
%run ./5_Helper_Consensus_Scoring.ipynb

### List of parameters to evaluate Consensus scoring

In [8]:
# none performs Consensus scoring using all molecules, scff and rand applies CS method over test set
splitting_methods = ['none', 'scff', 'rand'] 

scaffold_series = df_scff_murcko['scff_generic']

# Conf. Selectors
selectors = ['rand', 'LR', 'RF', 'XGB']

# Consensus Scoring Methods
consensus_methods = {
    'RbN': get_rank_by_number,
    'RbR': get_rank_by_rank,
    'BS': get_best_score,
    'ECR': get_exp_consensus_ranking
}

# List of parameters to compute
roc_params = {'metric_name': 'roc_auc'}
nef_params = {'metric_name': 'nef_auc'}
pr_params = {'metric_name': 'pr_auc'}

# The Ra value for the testing set in FXa is 75/1559 = 0.05
# Therefore the maximum value of alpha for bedroc could be a=20
bedroc_20 = {'metric_name': 'bedroc', 'alpha': 20}
bedroc_10 = {'metric_name': 'bedroc', 'alpha': 10}
bedroc_2 = {'metric_name': 'bedroc', 'alpha': 2}
bedroc_05 = {'metric_name': 'bedroc', 'alpha': 0.5}

# ef values 0.001, 0.005, 0.02, 0.1, 0.2
ef_0001 = {'metric_name': 'ef', 'fraction': 0.001}
ef_0005 = {'metric_name': 'ef', 'fraction': 0.005}
ef_002 = {'metric_name': 'ef', 'fraction': 0.02}
ef_02 = {'metric_name': 'ef', 'fraction': 0.2}

# List of metrics
metrics = [roc_params, nef_params, pr_params,
           bedroc_20, bedroc_10, bedroc_2, bedroc_05,
           ef_0001, ef_0005, ef_002, ef_02]

# Run Analysis

In [None]:
%%time

prot_name = 'CDK2'
base_filename = f'./consensus_scoring_results/{prot_name}_CS_results_conformational_selection'

for metric_eval in metrics:
    metric_name = '_'.join([str(i) for i in metric_eval.values()])
    print(metric_name)
    df = aggregate_conf_selection_results_CS(f'{base_filename}-{metric_name}.obj', 
                                             X, y, 
                                             splitting_methods=splitting_methods, 
                                             selectors=selectors,
                                             cs_methods=consensus_methods, 
                                             metrics=[metric_eval], 
                                             nreps=15, 
                                             scaffold_series=scaffold_series)

roc_auc
File loaded: ./consensus_scoring_results/CDK2_CS_results_conformational_selection-roc_auc.obj
nef_auc
File loaded: ./consensus_scoring_results/CDK2_CS_results_conformational_selection-nef_auc.obj
pr_auc
File loaded: ./consensus_scoring_results/CDK2_CS_results_conformational_selection-pr_auc.obj
bedroc_20
none/rand/RbN/bedroc
none/rand/RbR/bedroc
