In [1]:
%load_ext autoreload
%autoreload 2



from sklearn.metrics import average_precision_score
from sklearn.metrics import roc_auc_score
# noinspection PyUnresolvedReferences
from hcve_lib.tracking import load_run_results
from hcve_lib.utils import notebook_init, run_parallel
from hcve_lib.metrics import precision_recall_curve_with_confusion
from hcve_lib.evaluation_functions import map_inverse_weight

notebook_init()


from notebooks.deps.binary_predictive_performance import run_roc_analysis, get_pr_analysis
from notebooks.deps.binary_predictive_performance import get_pr_analysis, get_pr_analysis_ci, plot_pr_ci

from mlflow import set_tracking_uri
from notebooks.deps.config import TIME_POINT_PREDICTION
from deps.common import get_data_cached

from sklearn.metrics import roc_curve
from pandas import DataFrame
from plotly.graph_objs import Figure

from deps.constants import RANDOM_STATE
from hcve_lib.evaluation_functions import average_group_scores, merge_standardize_prediction, merge_predictions, \
    compute_metrics_prediction
from hcve_lib.metrics import BootstrappedMetric
from hcve_lib.tracking import load_group_results
from hcve_lib.visualisation import setup_plotly_style
from hcve_lib.functional import t
import numpy as np
from hcve_lib.metrics import statistic_from_bootstrap
from hcve_lib.functional import reject_none
from plotly import express as px
from hcve_lib.utils import transpose_list
from notebooks.deps.binary_predictive_performance import run_pr_analysis_ci

from config import GROUPS_LCO, GROUPS_10_fold
from hcve_lib.metrics import BinaryMetricFromScore
from hcve_lib.data import binarize_event

set_tracking_uri('http://localhost:5000')



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)  

In [3]:
data, metadata, X, y = get_data_cached()


[Memory]0.1s, 0.0min    : Loading get_data...
____________________________________________get_data cache loaded - 0.0s, 0.0min


In [4]:
from hcve_lib.functional import dict_subset

GROUPS = dict_subset(['gb', 'coxnet', 'stacking', 'pcp_hf'], GROUPS_LCO)

In [5]:
TIME_POINT_PREDICTION = 10*365
ITERATIONS = 100

## Counting cases/controls with binarization at end-point

### Total

In [6]:
binarized = binarize_event(TIME_POINT_PREDICTION, y['data'], drop_censored=False)

print('Before')
print(y['data']['label'].value_counts().to_dict())
print('↓')
print(f'Binarization {TIME_POINT_PREDICTION/365} years (NA removed)')
print((binarized.value_counts() - y['data']['label'].value_counts()).to_dict())
print('↓')
print(binarized.value_counts(dropna=True).to_dict())


Before
{0.0: 29286, 1.0: 1068}
↓
Binarization 10.0 years (NA removed)
{0.0: -27294, 1.0: -177}
↓
{0.0: 1992, 1.0: 891}






In [7]:
from toolz import valmap 

merged_prediction = valmap(
    lambda group_id: merge_predictions(average_group_scores(load_group_results(group_id))),
    GROUPS,
)


In [8]:
y_binarized = binarize_event(10*365, y['data'])





In [9]:
print(data.loc[y_binarized.index]['STUDY'].value_counts().to_dict())

{'HEALTHABC': 2188, 'ASCOT': 243, 'FLEMENGHO': 219, 'PROSPER': 191, 'PREDICTOR': 31, 'HVC': 11}


In [10]:
merged_prediction.keys()

dict_keys(['gb', 'coxnet', 'stacking', 'pcp_hf'])

### Per cohort

In [11]:
from hcve_lib.visualisation import b
from hcve_lib.data import binarize_event
from hcve_lib.utils import is_noneish

In [12]:
from deps.data import group_by_study

for study, study_X in group_by_study(data, X):
    b(study)
    binarized = binarize_event(TIME_POINT_PREDICTION, y['data'].loc[study_X.index], drop_censored=False)
    print('Before')
    before = y['data'].loc[study_X.index]['label'].value_counts()
    print(before.to_dict())
    print('↓')
    print(f'Binarization {TIME_POINT_PREDICTION/365} years (NA removed)')
    after = (binarized.value_counts() - before).to_dict()
    for value, count in after.items():
        if is_noneish(count):
            after[value] = - before[value]
    print(after)
    print('↓')
    print(binarized.value_counts(dropna=True).to_dict())

Before
{0.0: 19010, 1.0: 243}
↓
Binarization 10.0 years (NA removed)
{0.0: -19010, 1.0: 0.0}
↓
{1.0: 243}






Before
{0.0: 1083, 1.0: 18}
↓
Binarization 10.0 years (NA removed)
{0.0: -882, 1.0: 0}
↓
{0.0: 201, 1.0: 18}






Before
{0.0: 2361, 1.0: 574}
↓
Binarization 10.0 years (NA removed)
{0.0: -570, 1.0: -177}
↓
{0.0: 1791, 1.0: 397}






Before
{0.0: 425, 1.0: 11}
↓
Binarization 10.0 years (NA removed)
{0.0: -425, 1.0: 0.0}
↓
{1.0: 11}






Before
{0.0: 1432, 1.0: 31}
↓
Binarization 10.0 years (NA removed)
{0.0: -1432, 1.0: 0.0}
↓
{1.0: 31}






Before
{0.0: 4975, 1.0: 191}
↓
Binarization 10.0 years (NA removed)
{0.0: -4975, 1.0: 0.0}
↓
{1.0: 191}






## PR analysis

### Defining metrics

In [13]:
def limit_age_metric(metric):
     return StratifiedMetric(
        metric,
        splits={'30_to_80': list(get_30_to_80(X).index)}
    )

In [None]:
from hcve_lib.metrics import StratifiedMetric
from deps.data import get_30_to_80

pr_metrics_unweighted = (
    BinaryMetricFromScore(precision_recall_curve_with_confusion, time=TIME_POINT_PREDICTION, sample_weight=None),
)

pr_metrics_unweighted_summary = (
    BinaryMetricFromScore(average_precision_score, time=TIME_POINT_PREDICTION, sample_weight=None),
)



In [29]:
inverse_weight_cohorts = map_inverse_weight(data['STUDY'].loc[y_binarized.index])

pr_metrics_weighted_cohorts = [
    ((BinaryMetricFromScore(precision_recall_curve_with_confusion, time=TIME_POINT_PREDICTION, sample_weight=inverse_weight_cohorts))),
    ((BinaryMetricFromScore(average_precision_score, time=TIME_POINT_PREDICTION, sample_weight=inverse_weight_cohorts))),
]


pr_metrics_weighted_cohorts = (
    BinaryMetricFromScore(precision_recall_curve_with_confusion, time=TIME_POINT_PREDICTION, sample_weight=inverse_weight_cohorts),
)

pr_metrics_weighted_cohorts_summary = (
    BinaryMetricFromScore(average_precision_score, time=TIME_POINT_PREDICTION, sample_weight=inverse_weight_cohorts),
)


In [30]:
inverse_incident_weight = map_inverse_weight(y_binarized, proportions={1:0.03, 0: 0.97})

pr_metrics_incidence_weighted = [
    (BinaryMetricFromScore(precision_recall_curve_with_confusion, time=TIME_POINT_PREDICTION, sample_weight=inverse_incident_weight)),
    (BinaryMetricFromScore(average_precision_score, time=TIME_POINT_PREDICTION, sample_weight=inverse_incident_weight)),
]

In [17]:

combined_weight = (inverse_weight_cohorts*inverse_incident_weight).dropna()

pr_metrics_incidence_cohort_weighted = [
    BinaryMetricFromScore(precision_recall_curve_with_confusion, time=TIME_POINT_PREDICTION, sample_weight=combined_weight),
    BinaryMetricFromScore(average_precision_score, time=TIME_POINT_PREDICTION, sample_weight=combined_weight),
]

### Unweighted

In [None]:
run_pr_analysis(GROUPS, y, metrics=pr_metrics_unweighted, metrics_summary=pr_metrics_unweighted_summary, standardize=False, iterations=ITERATIONS)


In [18]:
pr = get_pr_analysis(GROUPS, y, metrics=pr_metrics_unweighted, standardize=False, iterations=ITERATIONS)
pr_ci = get_pr_analysis_ci(pr)

[Memory]14.8s, 0.2min   : Loading load_group_results...
_________________________________load_group_results cache loaded - 43.6s, 0.7min
[Memory]305.6s, 5.1min  : Loading load_group_results...
_________________________________load_group_results cache loaded - 12.4s, 0.2min
[Memory]564.8s, 9.4min  : Loading load_group_results...
_________________________________load_group_results cache loaded - 45.0s, 0.7min
[Memory]819.4s, 13.7min : Loading load_group_results...
__________________________________load_group_results cache loaded - 0.1s, 0.0min



divide by zero encountered in double_scalars


invalid value encountered in double_scalars



In [19]:
pr_summary = get_pr_analysis(
    GROUPS,
    y=y,
    metrics=[
        BinaryMetricFromScore(
            average_precision_score,
            time=TIME_POINT_PREDICTION,
            sample_weight=None,
        ),
    ],
    standardize=False,
    iterations=ITERATIONS,
    return_summary=True,
)

[Memory]1071.0s, 17.8min: Loading load_group_results...
_________________________________load_group_results cache loaded - 41.4s, 0.7min
[Memory]1118.2s, 18.6min: Loading load_group_results...
_________________________________load_group_results cache loaded - 11.3s, 0.2min
[Memory]1134.5s, 18.9min: Loading load_group_results...
_________________________________load_group_results cache loaded - 43.1s, 0.7min
[Memory]1183.3s, 19.7min: Loading load_group_results...
__________________________________load_group_results cache loaded - 0.1s, 0.0min


In [20]:
pr_summary

{'gb': {'average_precision_score_3650': {'mean': 0.5411641398489965,
   'ci': (0.511775463744616, 0.5659141739484992),
   'std': 0.01522466281743794}},
 'coxnet': {'average_precision_score_3650': {'mean': 0.4767401664086349,
   'ci': (0.44795480419045514, 0.5083342226095958),
   'std': 0.016382174761818795}},
 'stacking': {'average_precision_score_3650': {'mean': 0.8043644645039191,
   'ci': (0.782005272182563, 0.825385330538736),
   'std': 0.010842991136348375}},
 'pcp_hf': {'average_precision_score_3650': {'mean': 0.5503647911860816,
   'ci': (0.5247046288934044, 0.5824265386248236),
   'std': 0.014850461379101242}}}

In [None]:
fig = plot_pr_ci(pr_ci, pr_summary)
fig.write_image('./output/pr_final.svg')
fig.show()

In [22]:
# TODO: enable or disable
# from hcve_lib.wrapped_sklearn import DFStandardScaler
# import dtale

# predictions = DFStandardScaler().fit_transform(DataFrame({
#     'gb': merged_prediction['gb']['y_score'],
#     'coxnet': merged_prediction['coxnet']['y_score'],
#     'pcp_hf': merged_prediction['pcp_hf']['y_score'],
#     'stacking': merged_prediction['stacking']['y_score'],
# }
# ))
# predictions['y'] = y_binarized
# predictions.dropna(inplace=True)
# predictions['STUDY'] = data['STUDY'].loc[predictions.index]

# dtale.show(predictions.sample(len(predictions), weights=inverse_incident_weight, replace=True), host='localhost')

### Incidence normalized

In [33]:
run_pr_analysis_ci(
    GROUPS,
    y,
    metrics=pr_metrics_weighted_cohorts,
    metrics_summary=pr_metrics_weighted_cohorts_summary,
    standardize=False,
    iterations=ITERATIONS
).show()

[Memory]13049.7s, 217.5min: Loading load_group_results...
_________________________________load_group_results cache loaded - 84.4s, 1.4min
[Memory]13686.1s, 228.1min: Loading load_group_results...
_________________________________load_group_results cache loaded - 22.1s, 0.4min
[Memory]14266.4s, 237.8min: Loading load_group_results...
_________________________________load_group_results cache loaded - 80.1s, 1.3min
[Memory]14814.2s, 246.9min: Loading load_group_results...



divide by zero encountered in double_scalars


invalid value encountered in double_scalars



__________________________________load_group_results cache loaded - 0.2s, 0.0min


TypeError: get_pr_analysis() got an unexpected keyword argument 'summary'

### Cohort normalized

In [28]:
run_pr_analysis_ci(
    GROUPS,
    y,
    metrics=pr_metrics_incidence_weighted,
    metrics_summary=pr_metrics_incidence_weighted,
    standardize=False,
    iterations=ITERATIONS
).show()

[Memory]12883.9s, 214.7min: Loading load_group_results...
_________________________________load_group_results cache loaded - 41.7s, 0.7min



KeyboardInterrupt



In [None]:
pr_summary = get_pr_analysis(
    GROUPS,
    y=y,
    metrics=[
        BinaryMetricFromScore(
            average_precision_score,
            time=TIME_POINT_PREDICTION,
            sample_weight=inverse_weight_cohorts
        ),
    ],
    standardize=False,
    iterations=ITERATIONS,
    return_summary=True,
)

In [None]:
pr = get_pr_analysis(
    GROUPS,
    y=y,
    metrics=[
        BinaryMetricFromScore(
            average_precision_score,
            time=TIME_POINT_PREDICTION,
            sample_weight=inverse_weight_cohorts
        ),
    ],
    standardize=False,
    iterations=5,
    return_summary=False,
)

In [None]:
from itertools import combinations
from scipy.stats import ttest_ind
from pandas import Series
from numpy.random import seed
from numpy.random import randint
from scipy.stats import ks_2samp

scores_df = {method: merged_metrics_selected[method]['c_index'] for method in ['coxnet', 'gb', 'pcp_hf', 'stacking']}

for (name1, s1), (name2, s2) in combinations(scores_df.items(), 2):
    print(name1, name2)
    print(', '.join([f'{v:.2f}' for v in Series(s1).sample(10)]))
    print(', '.join([f'{v:.2f}' for v in Series(s2).sample(10)]))
    
    ks = ks_2samp(s1, s2)
    print(f"KS: {ks.statistic:.4f} (p-value: {ks.pvalue:.1e})")
    
    value, pvalue = ttest_ind(s1, s2)
    print(f"t-test: p-value: {pvalue:.1e}")
    
    print()
    

## ROC analysis

In [None]:
roc_metrics_unweighted = [
    BinaryMetricFromScore(roc_curve, time=TIME_POINT_PREDICTION, sample_weight=None),
    BinaryMetricFromScore(roc_auc_score, time=TIME_POINT_PREDICTION, sample_weight=None),
]

In [None]:
run_roc_analysis(GROUPS, roc_metrics_unweighted, standardize=True)

In [None]:
run_roc_analysis(GROUPS, roc_metrics_unweighted, standardize=False)

In [None]:
inverse_weight = get_inverse_weight(data['STUDY'])

roc_metrics_weighted = [
    BinaryMetricFromScore(roc_curve, time=TIME_POINT_PREDICTION, sample_weight=inverse_weight),
    BinaryMetricFromScore(roc_auc_score, time=TIME_POINT_PREDICTION, sample_weight=inverse_weight),
]

In [None]:
run_roc_analysis(GROUPS, roc_metrics_weighted, standardize=True)

In [None]:
run_roc_analysis(GROUPS, roc_metrics_weighted, standardize=False)
