-
Notifications
You must be signed in to change notification settings - Fork 2
/
metrics.py
76 lines (59 loc) · 3.83 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import numpy as np
import pandas as pd
import warnings
def cindex(y_true_times, predicted_times, tol=1e-8):
"""
Author: Romuald Menuet & Rémy Dubois
Evaluate concordance index from Pandas DataFrame, taking ties into account.
Args:
y_true_times: pd.DataFrame
pd DataFrame with three columns: `PatientID`, `Event` and `SurvivalTime` the float-valued column of true survival times.
predicted_times: pd.DataFrame
pd DataFrame with three columns: `PatientID`, `SurvivalTime` the float-valued column of predicted survival times,
and one `Event`column, whose value does not matter. It must be appended so that target and predictions have the same format.
tol: float
small float value for numerical stability.
Returns:
Concordance index, as described here:
https://square.github.io/pysurvival/metrics/c_index.html
"""
assert isinstance(y_true_times, pd.DataFrame), 'Y true times should be pd dataframe with `PatientID` as index, and `Event` and `SurvivalTime` as columns'
assert isinstance(predicted_times, pd.DataFrame), 'Predicted times should be pd dataframe with patient `PatientID` as index, and `Event` and `SurvivalTime` as columns'
assert len(y_true_times.shape) == 2, 'Y true times should be pd dataframe with `PatientID` as index, and `Event` and `SurvivalTime` as columns'
assert len(predicted_times.shape) == 2, 'Predicted times should be pd dataframe with `PatientID` as index, and `Event` and `SurvivalTime` as columns'
assert set(y_true_times.columns) == {'Event', 'SurvivalTime'}, 'Y true times should be pd dataframe with `PatientID` as index, and `Event` and `SurvivalTime` as columns'
assert set(predicted_times.columns) == {'Event', 'SurvivalTime'}, 'Predicted times should be pd dataframe with `PatientID` as index, and `Event` and `SurvivalTime` as columns'
np.testing.assert_equal(y_true_times.shape, predicted_times.shape, err_msg="Not same amount of predicted versus true samples")
assert set(y_true_times.index) == set(predicted_times.index), 'Not same patients in prediction versus ground truth'
assert np.all(predicted_times['SurvivalTime'] > 0), 'Predicted times should all be positive'
events = y_true_times.Event
y_true_times = y_true_times.SurvivalTime
predicted_times = predicted_times.SurvivalTime
# Just ordering the right way
predicted_times = predicted_times.loc[y_true_times.index]
events = events.loc[y_true_times.index]
events = events.values.astype(int)
y_true_times = y_true_times.values.astype(float)
predicted_times = predicted_times.values.astype(float)
# events = events.values.astype(bool)
np.testing.assert_array_less(1.,
predicted_times.astype(float),
err_msg="Predicted y_true_times all below 1.\
It should be in days. Make sure that you are not predicting risk instead of time.")
return _cindex_np(y_true_times, predicted_times, events)
def _cindex_np(times, predicted_times, events, tol=1.e-8):
"""
Raw CI computation from np arrray. Should not be used as is.
"""
assert times.ndim == predicted_times.ndim == events.ndim == 1, "wrong input, should be vectors only"
assert times.shape[0] == predicted_times.shape[0] == events.shape[0], "wrong input, should be vectors of the same len"
risks = - predicted_times
risks_i = risks.reshape((-1, 1))
risks_j = risks.reshape((1, -1))
times_i = times.reshape((-1, 1))
times_j = times.reshape((1, -1))
events_i = events.reshape((-1, 1))
eligible_pairs = (times_i < times_j) * events_i
well_ordered = np.sum(eligible_pairs * (risks_i > risks_j))
ties = + np.sum(eligible_pairs * 0.5 * (risks_i == risks_j))
return (well_ordered + ties) / (eligible_pairs.sum() + tol)