In [1]:
from hcve_lib.utils import notebook_init, transpose_dict, transpose_list

notebook_init()

import mlflow
from mlflow import get_experiment_by_name

from hcve_lib.metrics import StratifiedMetric
from deps.data import get_30_to_80
from hcve_lib.tracking import load_run_results
from deps.constants import RANDOM_STATE
from hcve_lib.evaluation_functions import compute_metric_result, merge_predictions, average_group_scores
from hcve_lib.metrics import BootstrappedMetric, CIndex
from mlflow import set_tracking_uri
import pandas
from pandas import DataFrame, Series
from toolz import valmap
from numpy import mean
from typing import Dict, List
from plotly import express as px
from deps.evaluation import get_latest_chosen_run_group
from hcve_lib.functional import star_args
from toolz import itemmap
from statistics import mean
from hcve_lib.functional import dict_subset
from deps.visualisation import plot_metric_matrix
from deps.evaluation import get_metric_matrix
from hcve_lib.tracking import load_group_results
from deps.visualisation import get_numerical_matrix
from deps.visualisation import plot_matrix
from config import GROUPS_LCO_SELECTED, GROUPS_LM_SELECTED, GROUPS_10_fold, GROUPS_LCO
from config import COHORTS
from deps.common import get_data_cached
from collections import defaultdict
from hcve_lib.functional import pipe, flatten
from hcve_lib.functional import try_except
from hcve_lib.evaluation_functions import compute_metric_prediction
from hcve_lib.metrics import WeightedCIndex
from hcve_lib.evaluation_functions import map_inverse_weight
from hcve_lib.formatting import pp
from plotly.offline import init_notebook_mode, iplot

%autoreload 2

ITERATIONS = 100

set_tracking_uri('http://localhost:5000')



In [2]:

init_notebook_mode(connected=True)  

In [3]:
data, metadata, X, y = get_data_cached()

[Memory]0.5s, 0.0min    : Loading get_data...
____________________________________________get_data cache loaded - 0.1s, 0.0min


In [4]:
inverse_weight_cohorts = map_inverse_weight(data['STUDY'])
inverse_incident_weight = map_inverse_weight(y['data']['label'], proportions={1:0.03, 0: 0.97})

In [5]:
GROUPS_LCO_MORE_METHODS = dict_subset(['coxnet', 'gb', 'pcp_hf', 'stacking', 'svm'], GROUPS_LCO)

## c-index per method per cohort

In [6]:
metrics_seed_averaged_lm = valmap(
    lambda group_id: compute_metric_result(
        BootstrappedMetric(
            CIndex(),
            random_state=RANDOM_STATE,
            iterations=ITERATIONS,
        ),
        y,
        average_group_scores(load_group_results(group_id)),
    ),
    GROUPS_LM_SELECTED,
)

metrics_lm_averaged = defaultdict(dict)

for method_name, values in metrics_seed_averaged_lm.items():
    for test_cohort in COHORTS:
        values_selected = [value['mean'] for split, value in values.items() if split[1] == test_cohort]
        metrics_lm_averaged[method_name][test_cohort] = mean(values_selected)

metrics_seed_averaged_lm = {'method_name': [], 'mean': [], 'test_cohort': [], 'splits': []}

for method_name, metrics_by_test in metrics_lm_averaged.items():
    metrics_seed_averaged_lm['method_name'] += ([method_name] * len(metrics_by_test))
    metrics_seed_averaged_lm['splits'] += (['Single-center'] * len(metrics_by_test))
    metrics_seed_averaged_lm['test_cohort'] += list(metrics_by_test.keys())
    metrics_seed_averaged_lm['mean'] += list(metrics_by_test.values())

In [7]:
metrics_seed_averaged_lco = valmap(
    lambda run_id: try_except(
        lambda: compute_metric_result(
            BootstrappedMetric(
                CIndex(),
                random_state=RANDOM_STATE,
                iterations=ITERATIONS,
            ),
            y,
            average_group_scores(load_group_results(run_id)),
        ),
        {AttributeError: lambda e: None},
    ),
    GROUPS_LCO_SELECTED,
)

metrics_seed_averaged_lco_lists = [
    [
        {
            'method_name': method_name,
            'test_cohort': split_name,
            'mean': metric['mean'],
            'ci_hi': metric['ci'][1] - metric['mean'],
        }
        for split_name, metric in result.items()
    ]
    for method_name, result in metrics_seed_averaged_lco.items()
]
metrics_seed_averaged_lco = pipe(
    metrics_seed_averaged_lco_lists,
    flatten,
    list,
    DataFrame,
    lambda df: df.assign(splits='Multi-center')
)

In [8]:
lco_lm_comparison = pandas.concat([DataFrame(metrics_seed_averaged_lm), metrics_seed_averaged_lco])

In [9]:
from hcve_lib.visualisation import setup_plotly_style

labels = {
    "mean": "c-index",
    'test_cohort': 'Tested on'
}
lco_lm_comparison_coxnet = lco_lm_comparison[lco_lm_comparison['method_name'] == 'coxnet']

fig = px.bar(
    lco_lm_comparison_coxnet,
    x='test_cohort',
    y='mean',
    barmode='group',
    range_y=[0.5, 0.8],
    labels=labels,
    pattern_shape='splits',
)
fig.update_traces(marker_color='#4194D9')
setup_plotly_style(fig)
fig.write_image('./output/coxnet_lm_lco.svg')
fig.show()

lco_lm_comparison_gb = lco_lm_comparison[lco_lm_comparison['method_name'] == 'gb']

fig = px.bar(
    lco_lm_comparison_gb,
    x='test_cohort',
    y='mean',
    color_continuous_scale='color',
    barmode='group',
    range_y=[0.5, 0.8],
    pattern_shape='splits',
    labels=labels,
)
fig.update_traces(marker_color='#FF7F00')
setup_plotly_style(fig)
fig.write_image('./output/gb_lm_lco.svg')
fig.show()

## c-index, merged per method, unweighted

In [10]:
merged_prediction = valmap(
    lambda group_id: merge_predictions(average_group_scores(load_group_results(group_id))),
    GROUPS_LCO_MORE_METHODS,
)

metrics_merged = compute_metric_result(
    CIndex(),
    y,
    merged_prediction,
)

In [11]:
metrics_merged

{'coxnet': 0.764451309303184,
 'gb': 0.7396042960526062,
 'pcp_hf': 0.6892358186151134,
 'stacking': 0.5466481812768551,
 'svm': 0.6290238453812033}

In [12]:
fig = px.bar(
    DataFrame(dict(metric=metrics_merged.values()), index=metrics_merged.keys()),
    range_y=[0.5,1],
    color_discrete_sequence=px.colors.qualitative.Pastel,
)
fig.update_layout(dict(xaxis_title=None))
setup_plotly_style(fig)
fig.show()

## c-index, merged per method, weighted

In [13]:
from config import *


def get_merge_boxplot_metrics(groups, random_state=RANDOM_STATE, weight = None, return_summary=False):
    merged_prediction = valmap(
        lambda group_id: merge_predictions(average_group_scores(load_group_results(group_id))),
        groups,
    )
    merged_prediction = {method: {**prediction, 'y_score': prediction['y_score'].loc[data.index]} for method, prediction
                         in merged_prediction.items()}

    return valmap(
        lambda result: compute_metric_prediction(
            BootstrappedMetric(
                WeightedCIndex(weight=weight),
                random_state,
                iterations=ITERATIONS,
                return_summary=return_summary,
            ),
            y,
            result,
        ),
        merged_prediction,
    )



In [15]:
merged_metrics_cohort_weighted = get_merge_boxplot_metrics(GROUPS_LCO_MORE_METHODS, weight=inverse_weight_cohorts)

In [16]:
merged_metrics_incidence_weighted = get_merge_boxplot_metrics(GROUPS_LCO_MORE_METHODS, weight=inverse_incident_weight)

### Boxplot

In [17]:
from plotly.graph_objs import *
from config import COLORS


def plot_merged_metrics(merged_metrics):
    metrics_df = DataFrame()

    for method_name, method_metric_values in merged_metrics.items():
        metric_values = method_metric_values["c_index"]
        metrics_df_add = DataFrame({
            'method': [METHODS_TITLE.get(method_name, method_name)] * len(metric_values),
            'method_original': [method_name] * len(metric_values),
            'values': metric_values,
            'color': [COLORS[method_name]] * len(metric_values),
        })
        metrics_df = pandas.concat([metrics_df, metrics_df_add])

    fig = px.violin(
        metrics_df,
        x="method_original",
        y="values",
        color="method_original",
        range_y=[0.65, 0.8],
        labels={'method': 'Test', 'values': 'c-index', **METHODS_TITLE},
        box=True,
        color_discrete_map=COLORS,
    )
    setup_plotly_style(fig)
    fig.update_traces(marker_size=2, marker_opacity=0.9)
    fig.update_layout(legend=dict(itemsizing='constant'))

    fig.update_layout(
        font=dict(family=FONT, size=28),
        bargroupgap=0.1,
        template='simple_white',
        xaxis_ticktext=metrics_df["method"],
        xaxis_title = None,
    )

    fig.update(
        layout_showlegend=False,
        layout_annotations=[
            dict(
                x=i + 0.4,
                y=(m_c := mean(metrics['c_index'])),
                text=f'{m_c:.3f}',
                showarrow=False,
                font=dict(
                    size=25,
                    color=COLORS[method_name],
                )
            )
            for i, (method_name, metrics) in enumerate(merged_metrics_selected.items())
        ],
    )

    return fig




In [18]:
merged_metrics_selected = dict_subset(('coxnet', 'gb', 'pcp_hf', 'stacking'), merged_metrics_cohort_weighted)

fig = plot_merged_metrics(merged_metrics_selected)
fig.show()

In [19]:
merged_metrics_selected = dict_subset(('coxnet', 'gb', 'pcp_hf', 'stacking'), merged_metrics_incidence_weighted)

fig = plot_merged_metrics(merged_metrics_selected)
fig.show()

In [20]:
from itertools import combinations
from scipy.stats import ttest_ind
from pandas import Series
from numpy.random import seed
from numpy.random import randint
from scipy.stats import ks_2samp

scores_df = {method: merged_metrics_selected[method]['c_index'] for method in ['coxnet', 'gb', 'pcp_hf', 'stacking']}

for (name1, s1), (name2, s2) in combinations(scores_df.items(), 2):
    print(name1, name2)
    print(', '.join([f'{v:.2f}' for v in Series(s1).sample(10)]))
    print(', '.join([f'{v:.2f}' for v in Series(s2).sample(10)]))
    
    ks = ks_2samp(s1, s2)
    print(f"KS: {ks.statistic:.4f} (p-value: {ks.pvalue:.1e})")
    
    value, pvalue = ttest_ind(s1, s2)
    print(f"t-test: p-value: {pvalue:.1e}")
    
    print()
    

coxnet gb
0.77, 0.76, 0.77, 0.76, 0.77, 0.76, 0.76, 0.77, 0.76, 0.77
0.74, 0.74, 0.76, 0.74, 0.75, 0.75, 0.74, 0.73, 0.74, 0.72
KS: 0.9000 (p-value: 5.0e-43)
t-test: p-value: 4.5e-54

coxnet pcp_hf
0.74, 0.76, 0.76, 0.75, 0.76, 0.75, 0.77, 0.77, 0.77, 0.76
0.68, 0.69, 0.67, 0.69, 0.68, 0.68, 0.69, 0.68, 0.70, 0.69
KS: 1.0000 (p-value: 2.2e-59)
t-test: p-value: 6.6e-133

coxnet stacking
0.76, 0.77, 0.76, 0.76, 0.77, 0.77, 0.76, 0.76, 0.77, 0.77
0.53, 0.55, 0.54, 0.54, 0.55, 0.53, 0.55, 0.55, 0.53, 0.56
KS: 1.0000 (p-value: 2.2e-59)
t-test: p-value: 5.4e-210

gb pcp_hf
0.75, 0.74, 0.75, 0.75, 0.75, 0.75, 0.74, 0.75, 0.73, 0.74
0.69, 0.70, 0.70, 0.68, 0.70, 0.68, 0.68, 0.69, 0.69, 0.70
KS: 1.0000 (p-value: 2.2e-59)
t-test: p-value: 1.4e-94

gb stacking
0.74, 0.76, 0.72, 0.75, 0.74, 0.71, 0.75, 0.75, 0.73, 0.74
0.55, 0.55, 0.54, 0.54, 0.56, 0.53, 0.54, 0.55, 0.54, 0.54
KS: 1.0000 (p-value: 2.2e-59)
t-test: p-value: 8.0e-195

pcp_hf stacking
0.70, 0.70, 0.70, 0.68, 0.69, 0.69, 0.69, 0.71, 0

### Summary

In [None]:
print(merged_metrics_summary)

### Points

In [None]:
metrics_df = DataFrame()
for method_name, method_metric_values in merged_metrics_selected.items():
    metric_values = method_metric_values["c_index"]
    metrics_df_add = DataFrame({
        'method': [method_name] * len(metric_values),
        'c-index': metric_values,
    })
    metrics_df = pandas.concat([metrics_df, metrics_df_add])

fig = px.strip(metrics_df, x="method", y="c-index", stripmode='overlay', range_y=[0.5, 1])
fig.update_traces(marker_size=2, marker_opacity=0.9)

for trace in fig.select_traces():
    trace.marker.update(size = 5)
    
fig.update_layout(legend=dict(itemsizing='constant'), xaxis_title=None)
setup_plotly_style(fig)
fig.show()