In [1]:

%pprint
import sys
if (osp.join('..', 'py') not in sys.path): sys.path.insert(1, osp.join('..', 'py'))

Pretty printing has been turned OFF


In [2]:

from FRVRS import (nu, fu, DataFrame, Index, Series, math, np, osp, re, sm, concat, display)
import matplotlib.pyplot as plt
import os
import pandas as pd
import seaborn as sns

In [3]:

# load data frames
data_frames_dict = nu.load_data_frames(frvrs_logs_df='frvrs_logs_df', category_history_df='category_history_df')
frvrs_logs_df = data_frames_dict['frvrs_logs_df']
print(frvrs_logs_df.shape) # (829116, 122)

Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/frvrs_logs_df.pkl.
Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/category_history_df.pkl.
(829116, 123)


In [6]:

# Get the supervised learning and group by columns
input_features = [
    'injury_id', 'injury_severity', 'injury_required_procedure', 'patient_salt', 'patient_sort', 'patient_pulse', 'patient_breath',
    'patient_hearing', 'patient_mood', 'patient_pose'
    ]

def one_hot_encode(df, columns):
    '''
    One-hot encodes the given columns in the given DataFrame.
    
    Args:
        df: A DataFrame.
        columns: A list of column names to encode.
    
    Returns:
        A DataFrame with the encoded columns.
    '''
    
    dummies = pd.get_dummies(df[columns], dummy_na=True)
    df = concat([df, dummies], axis='columns').drop(columns, axis='columns')
    
    return df

In [7]:

# One-hot encode the input features columns in the one-hot encode data frame
ascii_regex = compile('[^a-z0-9]+')
one_hot_encode_df = one_hot_encode(frvrs_logs_df[input_features], input_features)
one_hot_encode_df = one_hot_encode_df.rename(columns={cn: ascii_regex.sub('_', cn.lower()).strip('_') for cn in one_hot_encode_df.columns})
columns_list = [cn for cn in one_hot_encode_df.columns if any(map(lambda x: cn.endswith(x), ['_null', '_nan']))]
print(one_hot_encode_df.shape)
print(columns_list)
df = one_hot_encode_df.sample(min(18, one_hot_encode_df.shape[0])).dropna(axis='columns', how='all').T
display(df.sample(min(20, df.shape[0])).sort_index())

(829116, 78)
['injury_id_nan', 'injury_severity_nan', 'injury_required_procedure_nan', 'patient_salt_nan', 'patient_sort_nan', 'patient_pulse_nan', 'patient_breath_nan', 'patient_hearing_nan', 'patient_mood_nan', 'patient_pose_nan']


Unnamed: 0,758174,276597,138634,654396,806842,448342,789905,545040,470856,727522,187470,447242,534192,110967,675499,329203,581509,364808
injury_id_l_calf_laceration,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
injury_id_l_shin_amputation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
injury_id_l_wrist_amputation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
injury_id_r_shoulder_puncture,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
injury_id_r_thigh_laceration,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
injury_id_r_wrist_amputation,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
injury_required_procedure_decompress,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
injury_required_procedure_nan,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
patient_breath_none,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
patient_hearing_limited,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [9]:

lr_model = nu.load_object('lr_triage_priority_model')
dtr_model = nu.load_object('dtr_triage_priority_model')

In [10]:

# Add prediction columns
if ('lr_triage_priority_model_prediction' not in frvrs_logs_df.columns) or ('dtr_triage_priority_model_prediction' not in frvrs_logs_df.columns):
    for input_encode_idx, row_series in frvrs_logs_df.iterrows():
        input_encode_df = one_hot_encode_df.iloc[input_encode_idx].to_frame().T
        input_features_array = np.array(input_encode_df.values)
        frvrs_logs_df.loc[input_encode_idx, 'lr_triage_priority_model_prediction'] = lr_model.predict(input_features_array)[0]
        frvrs_logs_df.loc[input_encode_idx, 'dtr_triage_priority_model_prediction'] = dtr_model.predict(input_features_array)[0]
    nu.store_objects(frvrs_logs_df=frvrs_logs_df)
    nu.save_data_frames(frvrs_logs_df=frvrs_logs_df)

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/frvrs_logs_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/frvrs_logs_df.csv


In [None]:

category_history_df = data_frames_dict['category_history_df']
print(category_history_df.shape)


## Maintenance

In [None]:

# What encodings are missing from the training data?
spreadsheet_1hot_columns = nu.load_object('spreadsheet_1hot_columns')
print(list(set(spreadsheet_1hot_columns) - set(one_hot_encode_df.columns)))
lr_model = nu.load_object('lr_triage_priority_model')
dtr_model = nu.load_object('dtr_triage_priority_model')
extra_1hot_columns = list(set(one_hot_encode_df.columns) - set(lr_model.feature_names_in_.tolist()))
print(extra_1hot_columns)
nu.store_objects(extra_1hot_columns=extra_1hot_columns)

In [None]:

mask_series = (frvrs_logs_df.patient_mood == 'dead')
df = frvrs_logs_df[mask_series][input_features]
df.drop_duplicates()