In [1]:
%load_ext autoreload
%autoreload 2

In [27]:
import os
import numpy as np
import pandas as pd
import pingouin as pg
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

import ensembling
import metrics
import parsing
import plotting_utils
import postprocessing

In [31]:
MAIN_PATH = '../data/outputs'
AVG_FUN = np.nanmedian  # Reduce function to compute averages across targets. Using nanmedian because there is some issue with RF/NN-score.
PROTEIN_LIGAND_PAIR_REDUCE = 'max'  # Reduce function for protein-ligand pairs multiple entries (e.g. Vina, docking, RF/NN-score etc.)
PAIR_ID_COLS = ['target_id', 'ligand_id']  # Specifies a unique protein-ligand pair

PATH_RESULTS = {
    "DENVIS-G": {
        "atom": os.path.join(MAIN_PATH, 'denvis_outputs', 'dude_main_general_atom.parquet'),
        "surface": os.path.join(MAIN_PATH, 'denvis_outputs', 'dude_main_general_surface.parquet'),},
    "DENVIS-R": {
        "atom": os.path.join(MAIN_PATH, 'denvis_outputs', 'dude_main_refined_atom.parquet'),
        "surface": os.path.join(MAIN_PATH, 'denvis_outputs', 'dude_main_refined_surface.parquet')},
    "Ligand bsl.": os.path.join(MAIN_PATH, 'denvis_outputs', 'dude_main_refined_ligand_bsl.parquet'), # TODO
    "GNINA": os.path.join(MAIN_PATH, 'gnina_outputs', 'newdefault_CNNscore-max.summary'),
    "Vina": os.path.join(MAIN_PATH, 'vina_outputs', 'dude.csv'), # TODO
    "RF-score": os.path.join(MAIN_PATH, 'rfnn_dude_scores'), # TODO
    "NN-score": os.path.join(MAIN_PATH, 'rfnn_dude_scores'), # TODO
    "Deep DTA": os.path.join(MAIN_PATH, 'deepdta_outputs', 'dude_trained_pdbbind.json')} # TODO

# Network output combination kwargs for refined/general models
OUTPUT_COMBINATION_KWS = {
    'refined': {'y_aff_weight': 0., 'y_kd_weight': 0.5, 'y_ki_weight': 0.5, 'y_ic50_weight': 0.0, 'use_clf': False, 'clf_strategy': None},
    'general': {'y_aff_weight': 0., 'y_kd_weight': 0.5, 'y_ki_weight': 0.5, 'y_ic50_weight': 0.5, 'use_clf': False, 'clf_strategy': None},
}

In [None]:
results = dict()
print(f"Parsing atom-level results (refined)...")
results_tmp, _ = parsing.parse_results_denvis(PATH_RESULTS_REFINED_ATOM, dataset=DATASET, fmt='parquet')
results_refined_atom = ensembling.compute_ensemble_scores(results_tmp, ckpt=True, version=True, pair_id_cols=PAIR_ID_COLS)

print(f"Parsing surface-level results (refined)...")
results_tmp, _ = parsing.parse_results_denvis(PATH_RESULTS_REFINED_SURFACE, dataset=DATASET, fmt='parquet')
results_refined_surface = ensembling.compute_ensemble_scores(results_tmp, ckpt=True, version=True, pair_id_cols=PAIR_ID_COLS)

print(f"Parsing ligand baseline results (refined)...")
results_tmp, _ = parsing.parse_results_denvis(PATH_RESULTS_REFINED_LIGAND, dataset=DATASET, fmt='parquet')  
results_refined_ligand = ensembling.compute_ensemble_scores(results_tmp, ckpt=True, version=True, pair_id_cols=PAIR_ID_COLS)

print(f"Parsing atom-level results (general)...")
results_tmp, _ = parsing.parse_results_denvis(PATH_RESULTS_GENERAL_ATOM, dataset=DATASET, fmt='parquet')
results_general_atom = ensembling.compute_ensemble_scores(results_tmp, ckpt=True, version=True, pair_id_cols=PAIR_ID_COLS)

print(f"Parsing surface-level results (general)...")
results_tmp, _ = parsing.parse_results_denvis(PATH_RESULTS_GENERAL_SURFACE, dataset=DATASET, fmt='parquet')
results_general_surface = ensembling.compute_ensemble_scores(results_tmp, ckpt=True, version=True, pair_id_cols=PAIR_ID_COLS)

print(f"Parsing ligand baseline results (general)...")
results_tmp, _ = parsing.parse_results_denvis(PATH_RESULTS_GENERAL_LIGAND, dataset=DATASET, fmt='parquet')  
results_general_ligand = ensembling.compute_ensemble_scores(results_tmp, ckpt=True, version=True, pair_id_cols=PAIR_ID_COLS)

print(f"Parsing DeepDTA results...")
results_deep_dta = parsing.parse_results_deeppurpose(PATH_RESULTS_DEEP_DTA, dataset=DATASET)

print(f"Parsing Vina results...")
results_vina = parsing.parse_vina(PATH_RESULTS_VINA, dataset=DATASET, reduce=PROTEIN_LIGAND_PAIR_REDUCE)    

print(f"Parsing GNINA results...")
results_gnina = parsing.parse_results_gnina(PATH_RESULTS_GNINA, dataset=DATASET)

print(f"Parsing NN-score results...")
results_nn_score = parsing.parse_results_nn_score(PATH_RESULTS_RF_NN_SCORE, dataset=DATASET, reduce=PROTEIN_LIGAND_PAIR_REDUCE, prog_bar=True)
print(f"Parsing RF-score results...")
results_rf_score = parsing.parse_results_rf_score(PATH_RESULTS_RF_NN_SCORE, dataset=DATASET, reduce=PROTEIN_LIGAND_PAIR_REDUCE, prog_bar=True)

print(f"Parsing Glide results...")
results_glide = parsing.parse_results_docking(PATH_RESULTS_DOCKING, dataset=DATASET, method='Glide', reduce=PROTEIN_LIGAND_PAIR_REDUCE)
print(f"Parsing Surflex results...")
results_surflex = parsing.parse_results_docking(PATH_RESULTS_DOCKING, dataset=DATASET, method='Surflex', reduce=PROTEIN_LIGAND_PAIR_REDUCE)
print(f"Parsing Flexx results...")
results_flexx = parsing.parse_results_docking(PATH_RESULTS_DOCKING, dataset=DATASET, method='Flexx', reduce=PROTEIN_LIGAND_PAIR_REDUCE)
print(f"Parsing Gold results...")
results_gold = parsing.parse_results_docking(PATH_RESULTS_DOCKING, dataset=DATASET, method='Gold', reduce=PROTEIN_LIGAND_PAIR_REDUCE)

print('Done.')