In [2]:
import os
from os.path import join as oj
import sys, time
sys.path.insert(1, oj(sys.path[0], '..'))  # insert parent path
import seaborn as sns
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from copy import deepcopy
import pickle as pkl
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import cross_validate, ShuffleSplit, train_test_split
import pandas as pd
import data_pecarn 
import data_psrc
import data
import util
from style import *
from collections import Counter
from data import feats_numerical, feats_categorical, meta, outcome_def

# plt.style.use('dark_background')
# sns.set(style="white")
%matplotlib inline
%load_ext autoreload
%autoreload 2

# benchmark bivariable associations

In [10]:
df_pecarn, df_psrc, common_feats, filtered_feats_pecarn, filtered_feats_psrc = data.load_it_all(dummy=True, impute=False)
d = df_pecarn[common_feats]

computing pecarn preprocessing...
computing psrc preprocessing...


In [None]:
d['Age<2'] = d['Age'] < 2
d['GCS<14'] = d['GCSScore'] < 14
data.select_final_feats(filtered_feats_pecarn)

In [None]:
ks = ['AbdomenPain', 'AbdTrauma_or_SeatBeltSign', 'GCS<14',
      'AbdTenderDegree',
      'ThoracicTrauma_yes',
      'DecrBreathSound_yes',
      'VomitWretch',
      'Age<2',
      'MOI_Motor vehicle collision',
      'Hypotension_yes',
       'ThoracicTender_yes',
 'CostalTender',
'AbdDistention_yes',
 'DistractingPain_yes'
     ]
index = ['NA', 'IAIw/I', 'No IAIw/I', 'Difference']
o = data.outcome_def
r = {k: [] for k in ks}
io = d[o] == 1
# print(d[io])
for k in ks:
    ik = d[k] == 1 # d[o]==1
    inan = d[k].isna()
    r[k].append(np.sum(inan))   
    num1 = np.sum(d[ik][o]==1)
    denom1 = (io & ~inan).sum()
    r[k].append(f'{num1}/{denom1}')
    num2 = np.sum(d[ik][o]==0)
    denom2 = (~io & ~inan).sum()
    r[k].append(f'{num2}/{denom2}')
    r[k].append(f'{num1/denom1 - num2/denom2:0.3f}')    
r = pd.DataFrame.from_dict(r)
r.index = index
r.transpose()

# recreate / evaluate rule

In [60]:
def pecarn_rule_predict(d, o=data.outcome_def):
    n = d.shape[0]
    npos = d[o].sum()
    print(f'{"Initial":<25} {npos} / {n}')
    risks = np.array([np.nan] * d.shape[0])
    rules = [
        ('AbdTrauma_or_SeatBeltSign', ['yes']),
        ('GCSScore', range(14)),
        ('AbdTenderDegree', ['Mild', 'Moderate', 'Severe']),
        ('ThoracicTrauma', ['yes']),        
        ('AbdomenPain', ['yes']),
        ('DecrBreathSound', ['yes']),
        ('VomitWretch', ['yes']),
    ]
    for rule in rules:
        k, vals = rule
        idxs = d[k].isin(vals)
        do = d[idxs]
        d = d[~idxs]
        print(f'{k:<25} {d[o].sum()} / {d.shape[0]}\t{do[o].sum()} / {do.shape[0]}')
    
    low_risk_patients = d
    patients_missed = low_risk_patients[low_risk_patients[o] == 1]
    
    # calc metrics
    fn = patients_missed.shape[0]
    tp = npos - fn
    tn = low_risk_patients.shape[0] - fn
    fp = (n - low_risk_patients.shape[0]) - tp
    stats = {
        'sensitivity': tp / npos * 100,
        'specificity': tn / (tn + fp) * 100.
    }
    return d, patients_missed, stats

low_risk_patients, missed_patients, stats = pecarn_rule_predict(df_pecarn)
for stat in ['sensitivity', 'specificity']:
    print(stat, f'{stats[stat]:0.2f}')
missed_patients[['Age', 'Sex', 'MOI']]

Initial                   203 / 12044
AbdTrauma_or_SeatBeltSign 91 / 10081	112 / 1963
GCSScore                  55 / 9279	36 / 802
AbdTenderDegree           19 / 6767	36 / 2512
ThoracicTrauma            13 / 5807	6 / 960
AbdomenPain               11 / 5478	2 / 329
DecrBreathSound           9 / 5443	2 / 35
VomitWretch               7 / 5045	2 / 398
sensitivity 96.55
specificity 42.55


Unnamed: 0,Age,Sex,MOI
84,4,F,Motor vehicle collision
203,2,M,Fall down stairs
4349,2,M,Pedestrian/bicyclist struck by moving vehicle
5977,16,M,Motorcycle/ATV/Scooter collision
9419,17,F,Motor vehicle collision
9436,17,M,Motor vehicle collision
9971,17,M,Motor vehicle collision


In [61]:
low_risk_patients, missed_patients, stats = pecarn_rule_predict(df_psrc)
for stat in ['sensitivity', 'specificity']:
    print(stat, f'{stats[stat]:0.2f}')
missed_patients[['Age', 'MOI']]

Initial                   62 / 2188
AbdTrauma_or_SeatBeltSign 18 / 1708	44 / 480
GCSScore                  8 / 1441	10 / 267
AbdTenderDegree           2 / 1216	6 / 225
ThoracicTrauma            2 / 1123	0 / 93
AbdomenPain               2 / 993	0 / 130
DecrBreathSound           2 / 993	0 / 0
VomitWretch               2 / 937	0 / 56
sensitivity 96.77
specificity 43.98


Unnamed: 0,Age,MOI
1164,1.0,Motor vehicle collision
1755,6.0,Fall from an elevation
