In [1]:

%pprint
import sys
if (osp.join(os.pardir, 'py') not in sys.path): sys.path.insert(1, osp.join(os.pardir, 'py'))

Pretty printing has been turned OFF


In [2]:

from FRVRS import (nu, fu, DataFrame, Index, Series, math, np, osp, re, sm, read_excel, concat)
import matplotlib.pyplot as plt
import os
import pandas as pd
import seaborn as sns

In [3]:

# load data frames
data_frames_dict = nu.load_data_frames(frvrs_logs_df='frvrs_logs_df')
frvrs_logs_df = data_frames_dict['frvrs_logs_df']
print(frvrs_logs_df.shape) # (829116, 122)

Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/frvrs_logs_df.pkl.
(829116, 125)


In [4]:

base_mask_series = (frvrs_logs_df.scene_type == 'Triage') & (frvrs_logs_df.is_scene_aborted == False)
base_mask_series &= (frvrs_logs_df.is_a_one_triage_file == True)

In [7]:

if nu.pickle_exists('triage_categories_df'): triage_categories_df = nu.load_object('triage_categories_df')
else:
    file_path = '../data/xlsx/FirstResponder_InjuryTriage_Categories.xlsx'
    triage_categories_df = read_excel(file_path)
    triage_categories_df.columns = [
        'injury_id', 'implied_priority', 'injury_severity', 'injury_required_procedure', 'acceptable_secondary_procedure', 'patient_salt', 'patient_sort', 'patient_pulse',
        'patient_breath', 'patient_hearing', 'patient_mood', 'patient_pose', 'priority_notes'
    ]
    triage_categories_df = triage_categories_df.dropna(axis='columns', how='all')
    nu.store_objects(triage_categories_df=triage_categories_df)
    nu.save_data_frames(triage_categories_df=triage_categories_df)

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/triage_categories_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/triage_categories_df.csv


In [8]:

columns_list = [
    'implied_priority', 'injury_id', 'injury_severity', 'injury_required_procedure', 'patient_salt', 'patient_sort', 'patient_pulse', 'patient_breath',
    'patient_hearing', 'patient_mood', 'patient_pose'
]
triage_categories_df[columns_list].sample(10).T

Unnamed: 0,24,0,23,20,6,2,14,25,5,31
implied_priority,8.0,0.0,8.0,7.5,2.0,1.0,6.0,8.0,2.0,10.0
injury_id,L Thigh Puncture,Forehead Scrape,R Wrist Amputation,L Stomach Puncture,R Forearm Laceration,L Palm Laceration,L Calf Laceration,R Thigh Puncture,L Forearm Laceration,Face Shrapnel
injury_severity,medium,low,high,high,low,low,low,medium,low,high
injury_required_procedure,tourniquet,gauzePressure,tourniquet,woundpack,gauzePressure,gauzePressure,gauzePressure,tourniquet,gauzePressure,Airway
patient_salt,IMMEDIATE,MINIMAL,IMMEDIATE,IMMEDIATE,DELAYED,DELAYED,DELAYED,IMMEDIATE,DELAYED,DEAD
patient_sort,waver,walker,waver,waver,walker,walker,waver,waver,walker,Still
patient_pulse,fast,normal,fast,fast,normal,normal,normal,fast,normal,
patient_breath,normal,normal,fast,fast,normal,normal,normal,normal,normal,
patient_hearing,normal,normal,normal,normal,normal,normal,normal,normal,normal,
patient_mood,upset,calm,agony,agony,upset,calm,calm,upset,upset,dead



# Build a Model to Predict Triage Priority

In [9]:

# Get the supervised learning and group by columns
input_features = [
    'injury_id', 'injury_severity', 'injury_required_procedure', 'patient_salt', 'patient_sort', 'patient_pulse', 'patient_breath',
    'patient_hearing', 'patient_mood', 'patient_pose'
    ]
target_variable = 'implied_priority'


#### Create a data frame with all the feature columns

In [64]:

# Keep track of the target variable and input features columns
columns_list = [target_variable] + input_features
category_history_df = triage_categories_df[columns_list]

In [65]:

print(category_history_df.shape) # (31, 11)
print(list(category_history_df.columns))

(32, 11)
['implied_priority', 'injury_id', 'injury_severity', 'injury_required_procedure', 'patient_salt', 'patient_sort', 'patient_pulse', 'patient_breath', 'patient_hearing', 'patient_mood', 'patient_pose']


In [66]:

df = category_history_df.sample(min(12, category_history_df.shape[0])).dropna(axis='columns', how='all').T
display(df.sample(min(20, df.shape[0])).sort_index())

Unnamed: 0,13,28,7,3,23,8,9,15,20,30,31,11
implied_priority,5.5,9.5,2.5,1.0,8.0,2.5,3.0,6.0,7.5,10.0,10.0,5.0
injury_id,R Calf Laceration,L Chest Collapse,L Bicep Puncture,R Palm Laceration,R Wrist Amputation,R Bicep Puncture,L Shoulder Puncture,L Thigh Laceration,L Stomach Puncture,Face Shrapnel,Face Shrapnel,L Calf Shrapnel
injury_required_procedure,gauzePressure,decompress,tourniquet,gauzePressure,tourniquet,tourniquet,woundpack,gauzePressure,woundpack,airway,airway,tourniquet
injury_severity,low,high,medium,low,high,medium,medium,medium,high,high,high,medium
patient_breath,normal,collapsedRight,normal,normal,fast,normal,normal,normal,fast,restricted,none,normal
patient_hearing,normal,normal,normal,normal,normal,normal,normal,normal,normal,none,none,normal
patient_mood,calm,unresponsive,upset,calm,agony,upset,upset,upset,agony,unresponsive,dead,upset
patient_pose,fetal,supine,sittingGround,standing,sittingGround,sittingGround,kneeling,recovery,recovery,supine,supine,fetal
patient_pulse,normal,faint,fast,normal,fast,fast,fast,normal,fast,faint,none,fast
patient_salt,DELAYED,IMMEDIATE,IMMEDIATE,DELAYED,IMMEDIATE,IMMEDIATE,IMMEDIATE,DELAYED,IMMEDIATE,EXPECTANT,DEAD,IMMEDIATE



#### One-hot encode it

In [67]:

# One-hot encode the input features columns in the one-hot encode data frame
ascii_regex = re.compile('[^a-z0-9]+')
one_hot_encode_df = nu.one_hot_encode(category_history_df, input_features)
one_hot_encode_df = one_hot_encode_df.rename(columns={cn: ascii_regex.sub('_', cn.lower()).strip('_') for cn in one_hot_encode_df.columns})
columns_obj = one_hot_encode_df.columns
assert len(columns_obj) == len(set(columns_obj)), f"Duplicate column names: {columns_obj[columns_obj.duplicated()].tolist()}"
extra_1hot_columns = nu.load_object('extra_1hot_columns')
for cn in extra_1hot_columns:
    if cn not in one_hot_encode_df.columns: one_hot_encode_df[cn] = 0
columns_list = [cn for cn in one_hot_encode_df.columns if any(map(lambda x: cn.endswith(x), ['_null', '_nan']))]
print(one_hot_encode_df.shape) # (31, 79)
print(columns_list)
df = one_hot_encode_df.sample(min(20, one_hot_encode_df.shape[0])).dropna(axis='columns', how='all').T
display(df.sample(min(20, df.shape[0])).sort_index())

(32, 79)
['injury_id_nan', 'injury_severity_nan', 'injury_required_procedure_nan', 'patient_salt_nan', 'patient_sort_nan', 'patient_pulse_nan', 'patient_breath_nan', 'patient_hearing_nan', 'patient_mood_nan', 'patient_pose_nan']


Unnamed: 0,1,26,23,0,16,17,5,20,4,8,7,14,2,31,25,6,11,29,28,27
injury_id_face_shrapnel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
injury_id_forehead_scrape,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
injury_id_l_calf_laceration,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
injury_id_r_bicep_puncture,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
injury_id_r_shin_amputation,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
injury_id_r_wrist_amputation,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
injury_severity_low,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
injury_severity_medium,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
injury_severity_nan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
patient_breath_fast,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [68]:

# Analyze the input features
one_hot_column_descriptions_df = nu.get_column_descriptions(one_hot_encode_df.sample(min(2000, one_hot_encode_df.shape[0])))
display(one_hot_column_descriptions_df.sample(min(20, one_hot_column_descriptions_df.shape[0])).sort_index())

Unnamed: 0,column_name,dtype,count_blanks,count_uniques,count_zeroes,has_dates,min_value,max_value,only_integers
0,implied_priority,float64,0,16,1,True,0.0,10.0,False
3,injury_id_ear_bleed,uint8,0,2,31,True,0.0,1.0,True
4,injury_id_face_shrapnel,uint8,0,2,30,True,0.0,1.0,True
5,injury_id_forehead_scrape,uint8,0,2,31,True,0.0,1.0,True
14,injury_id_l_stomach_puncture,uint8,0,2,30,True,0.0,1.0,True
19,injury_id_r_calf_laceration,uint8,0,2,31,True,0.0,1.0,True
20,injury_id_r_calf_shrapnel,uint8,0,2,31,True,0.0,1.0,True
21,injury_id_r_chest_collapse,uint8,0,2,31,True,0.0,1.0,True
24,injury_id_r_shin_amputation,uint8,0,2,31,True,0.0,1.0,True
29,injury_id_nan,uint8,0,1,32,True,0.0,0.0,True



#### Train a model on it

In [72]:

# Train a model on the patient history data frame
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

# Split the data into training and test sets
df = one_hot_encode_df.dropna(axis='index', how='any')
nu.store_objects(spreadsheet_1hot_columns=df.drop(target_variable, axis='columns').columns.tolist())
X_train, X_test, y_train, y_test = train_test_split(
    df.drop(target_variable, axis='columns'),
    df[target_variable],
    test_size=0.25,
    random_state=42
)

# Convert the uint8 features to floats
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/spreadsheet_1hot_columns.pkl


In [73]:

# Create a decision tree regressor
dtr_model = DecisionTreeRegressor()
dtr_model.fit(X_train, y_train)
nu.store_objects(dtr_triage_priority_model=dtr_model)

# Predict on the test set
y_pred = dtr_model.predict(X_test)

# Calculate Mean Squared Error (MSE)
dtr_mse = np.mean((y_pred - y_test)**2)

# Calculate the feature importances
feature_importances = dtr_model.feature_importances_

# Create a data frame to store feature names and importances
feature_importances_df = DataFrame()
feature_importances_df['feature_name'] = df.drop(target_variable, axis='columns').columns
feature_importances_df['feature_importance'] = feature_importances

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/dtr_triage_priority_model.pkl


In [74]:

# Train a linear regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
nu.store_objects(lr_triage_priority_model=lr_model)

# Predict on the test set
y_pred = lr_model.predict(X_test)

# Calculate average squared error
lr_mse = np.mean((y_pred - y_test)**2)

# Get the coefficients of the linear regression model
feature_coefficients = lr_model.coef_[0]

# Create a data frame to store the feature names and feature coefficients
feature_coefficients_df = DataFrame()
feature_coefficients_df['feature_name'] = df.drop(target_variable, axis='columns').columns
feature_coefficients_df['feature_coefficient'] = feature_coefficients
feature_coefficients_df['absolute_coefficient'] = feature_coefficients_df.feature_coefficient.map(lambda x: abs(x))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/lr_triage_priority_model.pkl


In [75]:

# Print the accuracies
print('DTR MSE:', dtr_mse)
print('LR MSE:', lr_mse)

DTR MSE: 2.78125
LR MSE: 2.3055456011163784


In [76]:

# Display the feature importances data frame
display(feature_importances_df.sort_values('feature_importance', ascending=False).head(10))

Unnamed: 0,feature_name,feature_importance
46,patient_sort_walker,0.632356
57,patient_breath_normal,0.204689
47,patient_sort_waver,0.090953
75,patient_pose_supine,0.035868
74,patient_pose_standing,0.01874
42,patient_salt_immediate,0.004818
34,injury_required_procedure_gauzepressure,0.003542
68,patient_mood_upset,0.003188
40,patient_salt_delayed,0.002834
73,patient_pose_sittingground,0.001063


In [77]:

# Display the feature coefficients data frame
columns_list = ['feature_name', 'feature_coefficient']
display(feature_coefficients_df.sort_values('absolute_coefficient', ascending=False)[columns_list].head(10))

Unnamed: 0,feature_name,feature_coefficient
0,injury_id_asthmatic,-0.432417
49,patient_pulse_faint,-0.432417
56,patient_breath_none,-0.432417
55,patient_breath_fast,-0.432417
54,patient_breath_collapsedright,-0.432417
53,patient_pulse_nan,-0.432417
52,patient_pulse_normal,-0.432417
51,patient_pulse_none,-0.432417
50,patient_pulse_fast,-0.432417
48,patient_sort_nan,-0.432417



#### Perform a sample inference

In [78]:

# Display the one-hot encoded sample
input_encode_df = one_hot_encode_df.sample(1)
input_encode_idx = input_encode_df.index.item()
print(input_encode_idx)
print(input_encode_df.shape)
mask_series = (input_encode_df.T[input_encode_idx] == 0)
df = input_encode_df.T[~mask_series]
enc_idx_list = df.index.tolist()
display(df)

25
(1, 79)


Unnamed: 0,25
implied_priority,8.0
injury_id_l_thigh_puncture,1.0
injury_severity_medium,1.0
injury_required_procedure_tourniquet,1.0
patient_salt_immediate,1.0
patient_sort_waver,1.0
patient_pulse_fast,1.0
patient_breath_normal,1.0
patient_hearing_normal,1.0
patient_mood_upset,1.0


In [79]:

# Display the categories history sample
mask_series = (category_history_df.index == input_encode_idx)
df = category_history_df[mask_series]
print(df.shape)
cn_set = set()
for cn in category_history_df.columns:
    for enc_idx in enc_idx_list:
        if enc_idx.startswith(cn): cn_set.add(cn)
columns_list = [target_variable] + list(cn_set - set([target_variable]))
display(df[columns_list].T)

(1, 11)


Unnamed: 0,25
implied_priority,8.0
injury_required_procedure,tourniquet
injury_severity,medium
patient_sort,waver
patient_pulse,fast
patient_mood,upset
injury_id,L Thigh Puncture
patient_pose,sittingGround
patient_salt,IMMEDIATE
patient_breath,normal


In [80]:

# Convert the input features to a NumPy array
input_features_array = np.array(input_encode_df.drop(target_variable, axis='columns').values)
actual_target_value = input_encode_df[target_variable].squeeze()

In [81]:

# Predict the target value
predicted_target_value = lr_model.predict(input_features_array)[0]
print(f'LR: predicted: {predicted_target_value}, actual: {actual_target_value}')

LR: predicted: 5.125745482742786, actual: 8.0


In [82]:

# Predict the target value
predicted_target_value = dtr_model.predict(input_features_array)[0]
print(f'DTR: predicted: {predicted_target_value}, actual: {actual_target_value}')

DTR: predicted: 5.0, actual: 8.0



## Maintenance

In [165]:

# Add prediction columns
if ('lr_model_prediction' not in category_history_df.columns) or ('dtr_model_prediction' not in category_history_df.columns):
    for input_encode_idx, row_series in category_history_df.iterrows():
        input_encode_df = one_hot_encode_df.iloc[input_encode_idx].to_frame().T
        input_features_array = np.array(input_encode_df.drop(target_variable, axis='columns').values)
        category_history_df.loc[input_encode_idx, 'lr_model_prediction'] = lr_model.predict(input_features_array)[0]
        category_history_df.loc[input_encode_idx, 'dtr_model_prediction'] = dtr_model.predict(input_features_array)[0]
    nu.store_objects(category_history_df=category_history_df)
    nu.save_data_frames(category_history_df=category_history_df)

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/category_history_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/category_history_df.csv


In [166]:

category_history_df.sample(8).T

Unnamed: 0,16,17,5,2,4,30,25,10
implied_priority,6.0,7.0,2.0,1.0,1.5,10.0,8.0,3.0
injury_id,R Thigh Laceration,R Forearm Laceration,L Forearm Laceration,R Palm Laceration,Asthmatic,Face Shrapnel,L Thigh Puncture,R Shoulder Puncture
injury_severity,medium,medium,low,low,medium,high,medium,medium
injury_required_procedure,gauzePressure,woundpack,gauzePressure,gauzePressure,none,gauzePressure,tourniquet,woundpack
patient_salt,DELAYED,IMMEDIATE,DELAYED,DELAYED,DELAYED,EXPECTANT,IMMEDIATE,IMMEDIATE
patient_sort,waver,waver,walker,walker,walker,still,waver,walker
patient_pulse,normal,fast,normal,normal,fast,faint,fast,fast
patient_breath,normal,fast,normal,normal,restricted,restricted,normal,normal
patient_hearing,normal,normal,normal,normal,normal,none,normal,normal
patient_mood,upset,upset,upset,calm,upset,unresponsive,upset,upset


In [63]:

# Check that all the values are in the logs already
columns_list = [
    'injury_id', 'injury_severity', 'injury_required_procedure', 'patient_salt', 'patient_sort', 'patient_pulse', 'patient_breath',
    'patient_hearing', 'patient_mood', 'patient_pose'
]
for row_index, row_series in triage_categories_df[columns_list].iterrows():
    for column_name, column_value in row_series.iteritems():
        assert column_value in frvrs_logs_df[
            column_name
        ].unique().tolist(), f'"{column_value}" is not in {column_name}: {frvrs_logs_df[column_name].unique().tolist()}'

In [61]:

df = nu.check_for_typos([column_value], frvrs_logs_df[column_name].unique())
df

Unnamed: 0,left_item,right_item,max_similarity
0,,none,0.75


In [62]:

# Replace column value with the most similar
mask_series = (triage_categories_df[column_name] == column_value)
new_column_value = df.iloc[0].right_item
triage_categories_df.loc[mask_series, column_name] = new_column_value
nu.store_objects(triage_categories_df=triage_categories_df)
nu.save_data_frames(triage_categories_df=triage_categories_df)

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/triage_categories_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/triage_categories_df.csv
