In [1]:
from train import create_df, build_parser, ARDSDetectionModel

args = build_parser().parse_args([])
args.frame_size = 100
args.n_new_features = 8
args.feature_selection_method = 'chi2'
args.from_pickle = '50-50-fs100-remove-pt-0008.pkl'
df = create_df(args)
model = ARDSDetectionModel(args, df)

In [2]:
model.train_and_test()

----Run fold 1----
Selected features: ['median_mean_flow_from_pef', 'median_inst_RR', 'median_pef_+0.16_to_zero', 'median_iTime', 'median_eTime', 'median_I:E ratio', 'median_dyn_compliance', 'median_tve:tvi ratio']
Results via Youdens threshold
+-------+---------+-----+------+------+-----+
| patho | % votes | sen | spec | prec |  f1 |
+-------+---------+-----+------+------+-----+
| OTHER |    75   | 0.9 | 0.9  | 0.9  | 0.9 |
|  ARDS |    75   | 0.9 | 0.9  | 0.9  | 0.9 |
+-------+---------+-----+------+------+-----+
Model Results
+-------+----------+--------+-------------+-----------+------+--------+
| patho | accuracy | recall | specificity | precision | auc  |   f1   |
+-------+----------+--------+-------------+-----------+------+--------+
| OTHER |   0.85   |  0.7   |     1.0     |    1.0    | 0.99 | 0.8235 |
|  ARDS |   0.85   |  1.0   |     0.7     |   0.7692  | 0.99 | 0.8696 |
+-------+----------+--------+-------------+-----------+------+--------+
Misclassified Patients
+---------

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  results[col] = results[col].astype(int)


Results via Youdens threshold
+-------+---------+-----+--------+--------+--------+
| patho | % votes | sen |  spec  |  prec  |   f1   |
+-------+---------+-----+--------+--------+--------+
| OTHER |    50   | 0.8 |  1.0   |  1.0   | 0.8889 |
|  ARDS |    50   | 1.0 | 0.8333 | 0.8333 | 0.9091 |
+-------+---------+-----+--------+--------+--------+
Model Results
+-------+----------+--------+-------------+-----------+------+--------+
| patho | accuracy | recall | specificity | precision | auc  |   f1   |
+-------+----------+--------+-------------+-----------+------+--------+
| OTHER |   0.9    |  0.8   |     1.0     |    1.0    | 0.96 | 0.8889 |
|  ARDS |   0.9    |  1.0   |     0.8     |   0.8333  | 0.96 | 0.9091 |
+-------+----------+--------+-------------+-----------+------+--------+
Misclassified Patients
+-------------------+--------+------------+-------------+------------+
|      patient      | actual | prediction | OTHER Votes | ARDS Votes |
+-------------------+--------+-----------

In [3]:
import shap
shap.initjs()
explainer = shap.TreeExplainer(model.models[0])

In [4]:
splits = model.perform_data_splits()
splits = list(splits)

In [5]:
x_train, x_test, y_train, y_test = splits[0]
x_train, x_test = model.perform_feature_selection(x_train, y_train, x_test)

Selected features: ['median_mean_flow_from_pef', 'median_inst_RR', 'median_pef_+0.16_to_zero', 'median_iTime', 'median_eTime', 'median_I:E ratio', 'median_dyn_compliance', 'median_tve:tvi ratio']


In [6]:
explainer = shap.TreeExplainer(model.models[0])
vals = explainer.shap_values(x_train)
shap.force_plot(explainer.expected_value[0], vals[0][0,:], x_train.iloc[0,:])

In [7]:
cell = x_train.iloc[0,:].tolist()
model.models[0].predict([cell])

array([0])

In [8]:
shap.force_plot(explainer.expected_value[0], vals[0][0:100,:], x_train.iloc[0:100,:])

In [9]:
pt_112 = model.data[model.data.patient == '0112RPI1620160105']

In [10]:
explainer = shap.TreeExplainer(model.models[0])
pt_112_x = x_test.loc[pt_112.index]
pt_112_vals = explainer.shap_values(pt_112_x)

In [11]:
shap.force_plot(explainer.expected_value[1], pt_112_vals[1], pt_112_x)

In [12]:
3/308.0

0.00974025974025974

In [13]:
# near total confidence is described as >.99 prediction threshold
patients_with_near_total_confidence = model.results[(model.results.patho != model.results.prediction) & (model.results.pred_frac >= .99)][['patient', 'model_idx']]
patients_with_very_high_confidence = model.results[(model.results.patho != model.results.prediction) & (model.results.pred_frac >= .8)][['patient', 'model_idx']]



In [14]:
patients_with_near_total_confidence.patient

44    0343RPI3920161016
57    0354RPI5820161029
88    0624RPI0320180708
97    0625RPI2820180628
Name: patient, dtype: object

In [15]:
patients_with_very_high_confidence

Unnamed: 0,patient,model_idx
43,0304RPI1620160829,2
44,0343RPI3920161016,2
57,0354RPI5820161029,2
88,0624RPI0320180708,4
93,0545RPI0520171214,4
97,0625RPI2820180628,4


In [19]:
pt = model.data[model.data.patient == patients_with_near_total_confidence.patient.iloc[0]]
print(pt.patient.unique()[0])
model_idx = patients_with_near_total_confidence.model_idx.iloc[0]
explainer = shap.TreeExplainer(model.models[model_idx])
x_train, x_test, y_train, y_test = splits[model_idx]
x_train, x_test = model.perform_feature_selection(x_train, y_train, x_test)

pt_x = x_test.loc[pt.index]
pt_vals = explainer.shap_values(pt_x)
features = model.data.loc[pt_x.index][x_test.columns]
shap.force_plot(explainer.expected_value[1], pt_vals[1], features)

0343RPI3920161016
Selected features: ['median_mean_flow_from_pef', 'median_inst_RR', 'median_slope_minF_to_zero', 'median_pef_+0.16_to_zero', 'median_iTime', 'median_eTime', 'median_I:E ratio', 'median_dyn_compliance']


In [18]:
idx = 1
pt = model.data[model.data.patient == patients_with_near_total_confidence.patient.iloc[idx]]
print(pt.patient.unique()[0])
model_idx = patients_with_near_total_confidence.model_idx.iloc[idx]
explainer = shap.TreeExplainer(model.models[model_idx])
x_train, x_test, y_train, y_test = splits[model_idx]
x_train, x_test = model.perform_feature_selection(x_train, y_train, x_test)

pt_x = x_test.loc[pt.index]
pt_vals = explainer.shap_values(pt_x)
features = model.data.loc[pt_x.index][x_test.columns]
shap.force_plot(explainer.expected_value[1], pt_vals[1], features)

0354RPI5820161029
Selected features: ['median_mean_flow_from_pef', 'median_inst_RR', 'median_slope_minF_to_zero', 'median_pef_+0.16_to_zero', 'median_iTime', 'median_eTime', 'median_I:E ratio', 'median_dyn_compliance']


In [20]:
idx = 2
pt = model.data[model.data.patient == patients_with_near_total_confidence.patient.iloc[idx]]
print(pt.patient.unique()[0])
model_idx = patients_with_near_total_confidence.model_idx.iloc[idx]
explainer = shap.TreeExplainer(model.models[model_idx])
x_train, x_test, y_train, y_test = splits[model_idx]
x_train, x_test = model.perform_feature_selection(x_train, y_train, x_test)

pt_x = x_test.loc[pt.index]
pt_vals = explainer.shap_values(pt_x)
features = model.data.loc[pt_x.index][x_test.columns]
shap.force_plot(explainer.expected_value[1], pt_vals[1], features)

0624RPI0320180708
Selected features: ['median_mean_flow_from_pef', 'median_inst_RR', 'median_pef_+0.16_to_zero', 'median_iTime', 'median_eTime', 'median_I:E ratio', 'median_dyn_compliance', 'median_tve:tvi ratio']


In [21]:
idx = 3
pt = model.data[model.data.patient == patients_with_near_total_confidence.patient.iloc[idx]]
print(pt.patient.unique()[0])
model_idx = patients_with_near_total_confidence.model_idx.iloc[idx]
explainer = shap.TreeExplainer(model.models[model_idx])
x_train, x_test, y_train, y_test = splits[model_idx]
x_train, x_test = model.perform_feature_selection(x_train, y_train, x_test)

pt_x = x_test.loc[pt.index]
pt_vals = explainer.shap_values(pt_x)
features = model.data.loc[pt_x.index][x_test.columns]
shap.force_plot(explainer.expected_value[1], pt_vals[1], features)

0625RPI2820180628
Selected features: ['median_mean_flow_from_pef', 'median_inst_RR', 'median_pef_+0.16_to_zero', 'median_iTime', 'median_eTime', 'median_I:E ratio', 'median_dyn_compliance', 'median_tve:tvi ratio']
