# Description

This notebook reads the prediction results generated with the `011-prediction-*` notebooks and computes the final performance measures using the gold standard (PharmacotherapyDB).

# Modules loading

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path

import numpy as np
import pandas as pd
from tqdm import tqdm

import conf

# Settings

In [3]:
N_TISSUES = 49
N_THRESHOLDS = 5
N_PREDICTIONS = 646

In [4]:
OUTPUT_DIR = conf.RESULTS["DRUG_DISEASE_ANALYSES"] / "lincs"
display(OUTPUT_DIR)
assert OUTPUT_DIR.exists()

PosixPath('/media/miltondp/Elements1/projects/phenoplier/results/drug_disease_analyses/lincs')

In [5]:
OUTPUT_PREDICTIONS_DIR = Path(OUTPUT_DIR, "predictions", "dotprod_neg")
display(OUTPUT_PREDICTIONS_DIR)
OUTPUT_PREDICTIONS_DIR.mkdir(parents=True, exist_ok=True)

PosixPath('/media/miltondp/Elements1/projects/phenoplier/results/drug_disease_analyses/lincs/predictions/dotprod_neg')

# Load PharmacotherapyDB gold standard

In [6]:
gold_standard = pd.read_pickle(
    Path(conf.RESULTS["DRUG_DISEASE_ANALYSES"], "gold_standard.pkl"),
)

In [7]:
gold_standard.shape

(998, 3)

In [8]:
gold_standard.head()

Unnamed: 0,trait,drug,true_class
0,DOID:10652,DB00843,1
1,DOID:10652,DB00674,1
2,DOID:10652,DB01043,1
3,DOID:10652,DB00989,1
4,DOID:10652,DB00810,0


In [9]:
gold_standard["true_class"].value_counts()

1    755
0    243
Name: true_class, dtype: int64

In [10]:
gold_standard["true_class"].value_counts(normalize=True)

1    0.756513
0    0.243487
Name: true_class, dtype: float64

# Load drug-disease predictions

In [11]:
from collections import defaultdict

In [19]:
# get all prediction files

current_prediction_files = list(OUTPUT_PREDICTIONS_DIR.glob("*.h5"))
display(len(current_prediction_files))

assert len(current_prediction_files) == 2 * (
    N_TISSUES * N_THRESHOLDS
)  # two methods (single-gene and module-based)

9

AssertionError: 

In [13]:
current_prediction_files[:5]

[PosixPath('/media/miltondp/Elements1/projects/phenoplier/results/drug_disease_analyses/lincs/predictions/dotprod_neg/spredixcan-mashr-zscores-Adipose_Subcutaneous-data-all_genes-prediction_scores.h5'),
 PosixPath('/media/miltondp/Elements1/projects/phenoplier/results/drug_disease_analyses/lincs/predictions/dotprod_neg/spredixcan-mashr-zscores-Adipose_Subcutaneous-data-top_100_genes-prediction_scores.h5'),
 PosixPath('/media/miltondp/Elements1/projects/phenoplier/results/drug_disease_analyses/lincs/predictions/dotprod_neg/spredixcan-mashr-zscores-Adipose_Subcutaneous-data-top_250_genes-prediction_scores.h5'),
 PosixPath('/media/miltondp/Elements1/projects/phenoplier/results/drug_disease_analyses/lincs/predictions/dotprod_neg/spredixcan-mashr-zscores-Adipose_Subcutaneous-data-top_500_genes-prediction_scores.h5'),
 PosixPath('/media/miltondp/Elements1/projects/phenoplier/results/drug_disease_analyses/lincs/predictions/dotprod_neg/spredixcan-mashr-zscores-Adipose_Subcutaneous-data-top_50_

In [14]:
# iterate for each prediction file and perform some preprocessing
# each prediction file (.h5) has the predictions of one method (either module-based
# or gene-based) for all drug-disease pairs across all S-PrediXcan tissues

predictions = []

for f in tqdm(current_prediction_files, ncols=100):
    # get predictions and merge with gold standard, keeping only the drug-disease pairs present there
    prediction_data = pd.read_hdf(f, key="prediction")
    prediction_data = pd.merge(
        prediction_data, gold_standard, on=["trait", "drug"], how="inner"
    )

    # transform scores into ranks, and change the type of columns to save memory
    prediction_data["score"] = prediction_data["score"].rank()
    prediction_data["trait"] = prediction_data["trait"].astype("category")
    prediction_data["drug"] = prediction_data["drug"].astype("category")

    # read metadata
    metadata = pd.read_hdf(f, key="metadata")

    # add the "method" column
    prediction_data = prediction_data.assign(method=metadata.method.values[0])
    prediction_data["method"] = prediction_data["method"].astype("category")

    # add the "n_top_genes" columns, which indicates the top genes/LVs used for this prediction
    prediction_data = prediction_data.assign(n_top_genes=metadata.n_top_genes.values[0])

    # add the "data" column, which has the tissue name
    prediction_data = prediction_data.assign(data=metadata.data.values[0])
    prediction_data["data"] = prediction_data["data"].astype("category")

    predictions.append(prediction_data)

100%|█████████████████████████████████████████████████████████████| 490/490 [01:20<00:00,  6.05it/s]


In [15]:
assert np.all(pred.shape[0] == N_PREDICTIONS for pred in predictions)

In [16]:
predictions = pd.concat(predictions, ignore_index=True)

In [22]:
# extract the tissue name from the "data" column


def _get_tissue(x):
    if x.endswith("-projection"):
        return x.split("spredixcan-mashr-zscores-")[1].split("-projection")[0]
    else:
        return x.split("spredixcan-mashr-zscores-")[1].split("-data")[0]


predictions = predictions.assign(tissue=predictions["data"].apply(_get_tissue))

In [18]:
predictions.head()

Unnamed: 0,trait,drug,score,true_class,method,n_top_genes,data
0,DOID:0050741,DB00215,323.0,1,Gene-based,100.0,spredixcan-mashr-zscores-Pancreas-data
1,DOID:0050741,DB00704,230.0,1,Gene-based,100.0,spredixcan-mashr-zscores-Pancreas-data
2,DOID:0050741,DB00822,199.0,1,Gene-based,100.0,spredixcan-mashr-zscores-Pancreas-data
3,DOID:10283,DB00014,115.0,1,Gene-based,100.0,spredixcan-mashr-zscores-Pancreas-data
4,DOID:10283,DB00175,167.0,0,Gene-based,100.0,spredixcan-mashr-zscores-Pancreas-data


## Testing

In [17]:
display(predictions.shape)

assert predictions.shape[0] == 2 * (N_TISSUES * N_THRESHOLDS) * N_PREDICTIONS

(316540, 7)

In [19]:
assert not predictions.isna().any().any()

In [20]:
_tmp = predictions["method"].value_counts()
display(_tmp)

assert _tmp.loc["Gene-based"] == N_TISSUES * N_THRESHOLDS * N_PREDICTIONS
assert _tmp.loc["Module-based"] == N_TISSUES * N_THRESHOLDS * N_PREDICTIONS

Module-based    158270
Gene-based      158270
Name: method, dtype: int64

In [21]:
_tmp = predictions.groupby(["method", "n_top_genes"]).count()
display(_tmp)

assert np.all(_tmp == N_TISSUES * N_PREDICTIONS)

Unnamed: 0_level_0,Unnamed: 1_level_0,trait,drug,score,true_class,data
method,n_top_genes,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Gene-based,-1.0,31654,31654,31654,31654,31654
Gene-based,50.0,31654,31654,31654,31654,31654
Gene-based,100.0,31654,31654,31654,31654,31654
Gene-based,250.0,31654,31654,31654,31654,31654
Gene-based,500.0,31654,31654,31654,31654,31654
Module-based,-1.0,31654,31654,31654,31654,31654
Module-based,5.0,31654,31654,31654,31654,31654
Module-based,10.0,31654,31654,31654,31654,31654
Module-based,25.0,31654,31654,31654,31654,31654
Module-based,50.0,31654,31654,31654,31654,31654


In [24]:
_tmp = predictions.groupby(["method", "tissue"]).count()
display(_tmp)

assert np.all(_tmp.loc["Gene-based"] == (N_PREDICTIONS * N_THRESHOLDS))
assert np.all(_tmp.loc["Module-based"] == (N_PREDICTIONS * N_THRESHOLDS))

Unnamed: 0_level_0,Unnamed: 1_level_0,trait,drug,score,true_class,n_top_genes,data
method,tissue,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Gene-based,Adipose_Subcutaneous,3230,3230,3230,3230,3230,3230
Gene-based,Adipose_Visceral_Omentum,3230,3230,3230,3230,3230,3230
Gene-based,Adrenal_Gland,3230,3230,3230,3230,3230,3230
Gene-based,Artery_Aorta,3230,3230,3230,3230,3230,3230
Gene-based,Artery_Coronary,3230,3230,3230,3230,3230,3230
...,...,...,...,...,...,...,...
Module-based,Testis,3230,3230,3230,3230,3230,3230
Module-based,Thyroid,3230,3230,3230,3230,3230,3230
Module-based,Uterus,3230,3230,3230,3230,3230,3230
Module-based,Vagina,3230,3230,3230,3230,3230,3230


In [25]:
# all prediction tables should have the same shape
predictions_shape = (
    predictions.groupby(["method", "n_top_genes", "tissue"])
    .apply(lambda x: x.shape)
    .unique()
)
display(predictions_shape)

assert predictions_shape.shape[0] == 1
assert predictions_shape[0][0] == N_PREDICTIONS

array([(646, 8)], dtype=object)

## Save

In [26]:
output_file = Path(OUTPUT_DIR, "predictions", "predictions_results.pkl").resolve()
display(output_file)

PosixPath('/home/miltondp/projects/labs/greenelab/phenoplier/base/results/drug_disease_analyses/lincs/predictions/predictions_results.pkl')

In [27]:
predictions.to_pickle(output_file)

# Aggregate predictions

Here I get summaries from all predictions as follows:

  1. Group by trait, drug, method, tissue, and average all scores across all gene/LVs thresholds. This is the same as it is done in the published method referenced before (the framework for drug-repositioning).
  1. Then, group by trait, drug, method and take the maximum score across all tissues. The rationale for this is that 1) we don't know which tissue might have more information for a particular disease, and 2) tissue-specific TWAS results are not meaningful to extract conclusions of real tissue-specific effects, since there is a lot of eQTL sharing across tissues.

These correspond to the final drug-disease predictions for each method.

In [28]:
def _reduce_mean(x):
    return pd.Series(
        {"score": x["score"].mean(), "true_class": x["true_class"].unique()[0]}
    )


def _reduce_max(x):
    return pd.Series(
        {"score": x["score"].max(), "true_class": x["true_class"].unique()[0]}
    )

In [29]:
predictions_avg = (
    # average across n_top_genes
    predictions.groupby(["trait", "drug", "method", "tissue"])
    .apply(_reduce_mean)
    .dropna()
    # take maximum across tissues
    .groupby(["trait", "drug", "method"])
    .apply(_reduce_max)
    .dropna()
    .sort_index()
    .reset_index()
)

In [30]:
# predictions_avg should have twice the number of rows in the predictions table, since has both methods
display(predictions_avg.shape)
assert predictions_avg.shape[0] == int(predictions_shape[0][0] * 2)

(1292, 5)

In [31]:
assert predictions_avg.dropna().shape == predictions_avg.shape

In [32]:
predictions_avg.head()

Unnamed: 0,trait,drug,method,score,true_class
0,DOID:0050741,DB00215,Gene-based,359.0,1.0
1,DOID:0050741,DB00215,Module-based,437.8,1.0
2,DOID:0050741,DB00704,Gene-based,395.8,1.0
3,DOID:0050741,DB00704,Module-based,562.8,1.0
4,DOID:0050741,DB00822,Gene-based,556.8,1.0


## Save

In [33]:
output_file = Path(
    OUTPUT_DIR, "predictions", "predictions_results_aggregated.pkl"
).resolve()
display(output_file)

PosixPath('/home/miltondp/projects/labs/greenelab/phenoplier/base/results/drug_disease_analyses/lincs/predictions/predictions_results_aggregated.pkl')

In [34]:
predictions_avg.to_pickle(output_file)

# ROC

In [35]:
from sklearn.metrics import roc_auc_score

## Predictions

In [36]:
# AUROC by method/n_top_genes
predictions.groupby(["method", "tissue", "n_top_genes"]).apply(
    lambda x: roc_auc_score(x["true_class"], x["score"])
).groupby(["method", "n_top_genes"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
method,n_top_genes,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Gene-based,-1.0,49.0,0.537316,0.022095,0.488657,0.524654,0.539769,0.549363,0.591238
Gene-based,50.0,49.0,0.521783,0.022669,0.471299,0.509223,0.519465,0.53418,0.57867
Gene-based,100.0,49.0,0.526877,0.018609,0.488313,0.513366,0.526554,0.541049,0.566846
Gene-based,250.0,49.0,0.540346,0.021609,0.500138,0.529472,0.538378,0.560486,0.579923
Gene-based,500.0,49.0,0.539265,0.020471,0.505479,0.525439,0.537566,0.551318,0.598107
Module-based,-1.0,49.0,0.55097,0.021875,0.488396,0.53685,0.552254,0.565001,0.599869
Module-based,5.0,49.0,0.546616,0.022894,0.483261,0.528784,0.549019,0.561422,0.605431
Module-based,10.0,49.0,0.549093,0.023742,0.478016,0.536479,0.546349,0.560335,0.612864
Module-based,25.0,49.0,0.546519,0.02897,0.490034,0.523759,0.544105,0.567382,0.611074
Module-based,50.0,49.0,0.546046,0.026593,0.485353,0.531936,0.545826,0.562413,0.596345


In [37]:
# AUROC by method/tissue
predictions.groupby(["method", "tissue", "n_top_genes"]).apply(
    lambda x: roc_auc_score(x["true_class"], x["score"])
).groupby(["method", "tissue"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
method,tissue,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Gene-based,Adipose_Subcutaneous,5.0,0.536630,0.013501,0.520180,0.530449,0.531317,0.549707,0.551497
Gene-based,Adipose_Visceral_Omentum,5.0,0.518418,0.022685,0.485064,0.504405,0.533285,0.534180,0.535157
Gene-based,Adrenal_Gland,5.0,0.524940,0.019282,0.499381,0.509223,0.535240,0.539769,0.541090
Gene-based,Artery_Aorta,5.0,0.507475,0.005750,0.501845,0.503331,0.504887,0.513284,0.514027
Gene-based,Artery_Coronary,5.0,0.545325,0.005630,0.539053,0.541049,0.544628,0.549295,0.552598
...,...,...,...,...,...,...,...,...,...
Module-based,Testis,5.0,0.552882,0.016990,0.534703,0.541737,0.548937,0.561422,0.577610
Module-based,Thyroid,5.0,0.559365,0.008815,0.550933,0.551979,0.557334,0.565001,0.571581
Module-based,Uterus,5.0,0.537946,0.017693,0.516464,0.523759,0.539342,0.554856,0.555310
Module-based,Vagina,5.0,0.542282,0.017486,0.514660,0.540980,0.541779,0.553452,0.560541


## Predictions summaries

In [38]:
predictions_avg.head()

Unnamed: 0,trait,drug,method,score,true_class
0,DOID:0050741,DB00215,Gene-based,359.0,1.0
1,DOID:0050741,DB00215,Module-based,437.8,1.0
2,DOID:0050741,DB00704,Gene-based,395.8,1.0
3,DOID:0050741,DB00704,Module-based,562.8,1.0
4,DOID:0050741,DB00822,Gene-based,556.8,1.0


In [39]:
predictions_avg.groupby(["method"]).apply(
    lambda x: roc_auc_score(x["true_class"], x["score"])
)

method
Gene-based      0.565565
Module-based    0.632101
dtype: float64

These are the final performance measures using AUROC.

# PR

In [40]:
from sklearn.metrics import average_precision_score

## Predictions

In [41]:
# Average precision by method/n_top_genes
predictions.groupby(["method", "tissue", "n_top_genes"]).apply(
    lambda x: average_precision_score(x["true_class"], x["score"])
).groupby(["method", "n_top_genes"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
method,n_top_genes,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Gene-based,-1.0,49.0,0.821141,0.012115,0.791883,0.8157,0.823089,0.826527,0.845993
Gene-based,50.0,49.0,0.812367,0.011124,0.789323,0.805338,0.81095,0.818964,0.837116
Gene-based,100.0,49.0,0.815802,0.009807,0.795174,0.807916,0.815802,0.823319,0.833513
Gene-based,250.0,49.0,0.822883,0.011527,0.791251,0.816006,0.821466,0.829536,0.849501
Gene-based,500.0,49.0,0.822643,0.009925,0.803506,0.815523,0.822377,0.828614,0.844589
Module-based,-1.0,49.0,0.825578,0.010326,0.803155,0.818189,0.825569,0.832833,0.850523
Module-based,5.0,49.0,0.824127,0.011438,0.79578,0.815947,0.827801,0.8319,0.851023
Module-based,10.0,49.0,0.824383,0.0125,0.795728,0.817049,0.823271,0.831864,0.849924
Module-based,25.0,49.0,0.821462,0.01499,0.794916,0.810213,0.820894,0.830868,0.853334
Module-based,50.0,49.0,0.822552,0.015475,0.778559,0.815136,0.824726,0.831564,0.851906


In [42]:
# Average precision by method/tissue
predictions.groupby(["method", "tissue", "n_top_genes"]).apply(
    lambda x: average_precision_score(x["true_class"], x["score"])
).groupby(["method", "tissue"]).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
method,tissue,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Gene-based,Adipose_Subcutaneous,5.0,0.824818,0.009010,0.814039,0.819139,0.822745,0.833728,0.834440
Gene-based,Adipose_Visceral_Omentum,5.0,0.814407,0.013335,0.796885,0.803859,0.819274,0.825769,0.826248
Gene-based,Adrenal_Gland,5.0,0.812982,0.006435,0.804577,0.807679,0.816006,0.817753,0.818894
Gene-based,Artery_Aorta,5.0,0.805476,0.003914,0.800970,0.803506,0.804125,0.807866,0.810915
Gene-based,Artery_Coronary,5.0,0.824009,0.003853,0.819936,0.820489,0.824517,0.825967,0.829138
...,...,...,...,...,...,...,...,...,...
Module-based,Testis,5.0,0.829219,0.009344,0.816903,0.825178,0.828267,0.833967,0.841779
Module-based,Thyroid,5.0,0.829913,0.004428,0.822062,0.831268,0.831579,0.831826,0.832833
Module-based,Uterus,5.0,0.812736,0.012707,0.798563,0.800676,0.814974,0.822915,0.826553
Module-based,Vagina,5.0,0.811183,0.013788,0.792978,0.805208,0.810304,0.817441,0.829982


## Predictions summaries

In [43]:
predictions_avg.groupby(["method"]).apply(
    lambda x: average_precision_score(x["true_class"], x["score"])
)

method
Gene-based      0.838652
Module-based    0.857572
dtype: float64

These are the final performance measures using average precision.