In [6]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../src')
import viz
import pickle as pkl
import matplotlib.pyplot as plt
from os.path import join as oj
import os
import numpy as np
from imodels import RuleListClassifier, RuleFit, SLIM, GreedyRuleList, SkopeRules, IRFClassifier
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier

import data
import validate
outcome_def = 'iai_intervention' # output
MODELS_DIR = '../models/simple_3_splits'
os.makedirs(MODELS_DIR, exist_ok=True)


# load the data
df_pecarn, df_psrc, common_feats, filtered_feats_pecarn, filtered_feats_psrc = data.load_it_all(dummy=True)
df = df_pecarn[common_feats].append(df_psrc[common_feats])
processed_feats = data.select_final_feats(common_feats)

# split the idxs
train_idxs = df.cv_fold.isin(data.pecarn_train_idxs)
test_idxs1 = df.cv_fold.isin(data.pecarn_test_idxs)
test_idxs2 = df.cv_fold.isin(data.psrc_train_idxs + data.psrc_test_idxs)

# split the data
X, y = df, df[outcome_def]
half = train_idxs.sum() // 2
split_to_plot = '_test2'

# 4-split
# X_train, y_train = X[train_idxs][:half], y[train_idxs][:half]
# X_cv, y_cv = X[train_idxs][half:], y[train_idxs][half:]

# 3-split
X_train, y_train = X[train_idxs], y[train_idxs]
X_cv, y_cv = X_train, y_train

X_test1, y_test1 = X[test_idxs1], y[test_idxs1]
X_test2, y_test2 = X[test_idxs2], y[test_idxs2]
data_sizes = {
    'train': (y_train.sum(), X_train.shape[0]),
    'cv': (y_cv.sum(), X_cv.shape[0]),
    'test1': (y_test1.sum(), X_test1.shape[0]),
    'test2': (y_test2.sum(), X_test2.shape[0]),
}     
def predict_and_save(model, model_name='decision_tree', split_to_plot='_cv'):
    '''Plots cv and returns cv, saves all stats
    '''
    results = {'model': model}
    for x, y, suffix in zip([X_train, X_test1, X_test2, X_cv], [y_train, y_test1, y_test2, y_cv], 
                            ['_train', '_test1', '_test2', '_cv']):
        sens, spec, threshes = validate.sensitivity_specificity_curve(y,
                                                                      model.predict_proba(x)[:, 1],
                                                                      plot=suffix == split_to_plot)
        plt.title(split_to_plot[1:])
        results['sens' + suffix] = sens
        results['spec' + suffix] = spec
        results['threshes' + suffix] = threshes
    pkl.dump(results, open(oj(MODELS_DIR, model_name + '.pkl'), 'wb'))        
    return sens, spec, threshes

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


  result = method(y)


In [8]:
X_train.head()

Unnamed: 0,ThoracicTrauma_yes,GCSScore_Full_yes,GCSScore_Full_no,AbdTrauma_or_SeatBeltSign_yes,AbdTrauma_yes,MOI_Object struck abdomen,VomitWretch_unknown,MOI_Pedestrian/bicyclist struck by moving vehicle,Hypotension_yes,AbdTenderDegree_Mild,...,AbdDistention_no,InitHeartRate,MOI_Fall from an elevation,DecrBreathSound_yes,LtCostalTender,MOI_Motorcycle/ATV/Scooter collision,VomitWretch_yes,iai_intervention,cv_fold,dset
1,1,1,0,0,0,0,0,0,0,0,...,1,102.0,1,0,1.0,0,0,0,4,pecarn
3,0,1,0,0,0,1,0,0,0,0,...,1,74.0,0,0,2.0,0,0,0,1,pecarn
4,0,1,0,0,0,0,0,1,0,0,...,1,103.0,0,0,2.0,0,1,0,2,pecarn
5,1,1,0,1,1,1,0,0,0,0,...,1,124.0,0,0,2.0,0,0,0,4,pecarn
7,1,1,0,0,0,0,0,1,0,0,...,1,122.0,0,0,1.0,0,0,0,1,pecarn


**test individual rule**

In [51]:
def pred_rule(X):
    yhat = (X['AbdTenderDegree_None']==0) | (X['GCSScore_Full_yes'] == 0)
    yhat = yhat | (X['AbdTrauma_or_SeatBeltSign_yes'] == 1)
#     yhat = yhat | (X['ThoracicTrauma_yes'] == 1)
#     yhat = yhat | (X['CostalTender_yes'] == 1)
#     yhat = yhat | (X['DecrBreathSound_yes'] == 1)
#     yhat = yhat | (X['Hypotension_yes'] == 1)
#     yhat = yhat | (X['AbdDistention_or_AbdomenPain_yes'] == 1)
#     yhat = yhat | (X['MOI_Motor vehicle collision'] == 1) & (X['AbdTrauma_or_SeatBeltSign_yes'] == 1)
#     yhat = yhat | (X['VomitWretch_yes'] == 1)
    return yhat.astype(int)
y_train_pred = pred_rule(X_train)
print(f'train sens: {100*validate.sensitivity_score(y_train, y_train_pred):.2f}, {100*validate.specificity_score(y_train, y_train_pred):0.2f}')
y_test1_pred = pred_rule(X_test1)
print(f'test1 sens: {100*validate.sensitivity_score(y_test1, y_test1_pred):.2f}, {100*validate.specificity_score(y_test1, y_test1_pred):0.2f}')
y_test2_pred = pred_rule(X_test2)
print(f'test2 sens: {100*validate.sensitivity_score(y_test2, y_test2_pred):.2f}, {100*validate.specificity_score(y_test2, y_test2_pred):0.2f}')

train sens: 95.49, 45.76
test1 sens: 92.86, 43.97
test2 sens: 98.39, 48.26


In [None]:
def pred_rule(X):
    yhat = (X['AbdTrauma_or_SeatBeltSign_yes'] == 1)
#     yhat = yhat | (X['GCSScore_Full_yes'] == 0)
    yhat = yhat | (X['GCSScore'].isin(range(15)))
    yhat = yhat | (X['AbdTenderDegree_None']==0)
#     yhat = yhat | (X['ThoracicTrauma_yes'] == 1)
#     yhat = yhat | (X['AbdomenPain_yes'] == 1)
#     yhat = yhat | (X['DecrBreathSound_yes'] == 1)
#     yhat = yhat | (X['VomitWretch_yes'] == 1)
#     yhat = yhat | (X['CostalTender_yes'] == 1)    
    return yhat.astype(int)
y_train_pred = pred_rule(X_train)
print(f'train sens: {100*validate.sensitivity_score(y_train, y_train_pred):.2f}, {100*validate.specificity_score(y_train, y_train_pred):0.2f}')
y_test1_pred = pred_rule(X_test1)
print(f'test1 sens: {100*validate.sensitivity_score(y_test1, y_test1_pred):.2f}, {100*validate.specificity_score(y_test1, y_test1_pred):0.2f}')
y_test2_pred = pred_rule(X_test2)
print(f'test2 sens: {100*validate.sensitivity_score(y_test2, y_test2_pred):.2f}, {100*validate.specificity_score(y_test2, y_test2_pred):0.2f}')