In [1]:

%pprint
import sys
if (osp.join('..', 'py') not in sys.path): sys.path.insert(1, osp.join('..', 'py'))

Pretty printing has been turned OFF


In [2]:

from FRVRS import (nu, fu, DataFrame, to_datetime)
from itertools import product
from pysan.statistics import get_turbulence
from pysan import get_entropy, get_complexity

In [3]:

# load data frames
data_frames_dict = nu.load_data_frames(frvrs_logs_df='frvrs_logs_df', distance_delta_df='distance_delta_df')
frvrs_logs_df = data_frames_dict['frvrs_logs_df']
print(frvrs_logs_df.shape) # (829116, 120)
distance_delta_df = data_frames_dict['distance_delta_df']
print(distance_delta_df.shape) # (873, 15)

Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/frvrs_logs_df.pkl.
Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/distance_delta_df.pkl.
(829116, 120)
(873, 15)



# Create a Data Frame of all the Scene Stats

In [None]:

def add_scene_columns_to_row(scene_df, row_dict):
    row_dict['first_engagement'] = fu.get_first_engagement(scene_df)
    row_dict['first_treatment'] = fu.get_first_treatment(scene_df)
    row_dict['injury_correctly_treated_count'] = fu.get_injury_correctly_treated_count(scene_df)
    row_dict['injury_not_treated_count'] = fu.get_injury_not_treated_count(scene_df)
    row_dict['injury_treated_count'] = fu.get_injury_treatments_count(scene_df)
    row_dict['injury_wrongly_treated_count'] = fu.get_injury_wrongly_treated_count(scene_df)
    row_dict['is_a_one_triage_file'] = fu.get_is_a_one_triage_file(scene_df)
    row_dict['is_scene_aborted'] = fu.get_is_scene_aborted(scene_df)
    row_dict['last_engagement'] = fu.get_last_engagement(scene_df)
    row_dict['logger_version'] = fu.get_logger_version(scene_df)
    row_dict['measure_of_right_ordering'] = fu.get_measure_of_right_ordering(scene_df)
    row_dict['patient_count'] = fu.get_patient_count(scene_df)
    row_dict['percent_hemorrhage_controlled'] = fu.get_percent_hemorrhage_controlled(scene_df)
    row_dict['pulse_taken_count'] = fu.get_pulse_taken_count(scene_df)
    row_dict['scene_end'] = fu.get_scene_end(scene_df)
    row_dict['scene_start'] = fu.get_scene_start(scene_df)
    row_dict['scene_type'] = fu.get_scene_type(scene_df)
    row_dict['stills_value'] = fu.get_stills_value(scene_df)
    row_dict['teleport_count'] = fu.get_teleport_count(scene_df)
    row_dict['time_to_last_hemorrhage_controlled'] = fu.get_time_to_last_hemorrhage_controlled(scene_df)
    row_dict['total_actions'] = fu.get_total_actions_count(scene_df)
    row_dict['triage_time'] = fu.get_triage_time(scene_df)
    row_dict['voice_capture_count'] = fu.get_voice_capture_count(scene_df)
    row_dict['walk_command_count'] = fu.get_walk_command_count(scene_df)
    row_dict['walk_value'] = fu.get_walk_value(scene_df)
    row_dict['walkers_value'] = fu.get_walkers_value(scene_df)
    row_dict['wave_command_count'] = fu.get_wave_command_count(scene_df)
    row_dict['wave_value'] = fu.get_wave_value(scene_df)

    return row_dict

In [4]:

if nu.pickle_exists('scene_stats_df'): scene_stats_df = nu.load_object('scene_stats_df')
else:
    
    # Get the combo alphabet list
    combos_alphabet = sorted(set([sequence_tuple[0] + '-' + sequence_tuple[1] for sequence_tuple in product(
        fu.patient_sort_order, fu.injury_severity_order
    )]))
    
    rows_list = []
    engagment_columns_list = ['patient_id', 'engagement_start', 'location_tuple', 'patient_sort', 'predicted_priority', 'injury_severity']
    for (session_uuid, scene_id), idx_df in distance_delta_df.groupby(fu.scene_groupby_columns):
        row_dict = list(idx_df.T.to_dict().values())[0]
        
        # Get the whole scene history
        mask_series = True
        for cn in fu.scene_groupby_columns: mask_series &= (frvrs_logs_df[cn] == eval(cn))
        scene_df = frvrs_logs_df[mask_series]
        
        # Get the engagement sequence and the stats from that
        actual_engagement_order = fu.get_order_of_actual_engagement(scene_df, verbose=False)
        if actual_engagement_order:
            df = DataFrame(actual_engagement_order, columns=engagment_columns_list)
            patient_sorts_list = df.apply(lambda row_series: row_series.patient_sort, axis='columns').tolist()
            patient_sorts_sequence, patient_sorts_string_to_integer_map = nu.convert_strings_to_integers(
                patient_sorts_list, alphabet_list=fu.patient_sort_order
            )
            injury_severitys_list = df.apply(lambda row_series: row_series.injury_severity, axis='columns').tolist()
            injury_severitys_sequence, injury_severitys_string_to_integer_map = nu.convert_strings_to_integers(
                injury_severitys_list, alphabet_list=fu.injury_severity_order
            )
            combos_list = [str(patient_sort) + '-' + str(injury_severity) for patient_sort, injury_severity in zip(
                patient_sorts_list, injury_severitys_list
            )]
            combos_sequence, combos_string_to_integer_map = nu.convert_strings_to_integers(combos_list, alphabet_list=combos_alphabet)
            
            # Replace negative values with 9s to maintain character width
            patient_sorts_sequence[patient_sorts_sequence < 0] = 9
            injury_severitys_sequence[injury_severitys_sequence < 0] = 9
            combos_sequence[combos_sequence < 0] = 9
            
            for fn in [get_turbulence, get_entropy, get_complexity]:
                function_name = fn.__name__
                for cn in ['patient_sort', 'injury_severity', 'combo']:
                    dictionary_key = function_name.replace('get', cn)
                    variable_name = f'{cn}s_sequence'
                    try:
                        dictionary_value = fn(list(eval(variable_name)))
                        row_dict[dictionary_key] = dictionary_value
                    except: continue
        
        row_dict = add_scene_columns_to_row(scene_df, row_dict)
        rows_list.append(row_dict)
    scene_stats_df = DataFrame(rows_list)
    nu.store_objects(scene_stats_df=scene_stats_df)
    nu.save_data_frames(scene_stats_df=scene_stats_df)
print(scene_stats_df.shape) # (880, 59)

(880, 59)


In [5]:

# Merge file stats with the scene stats
if 'encounter_layout' not in scene_stats_df.columns:
    data_frames_dict = nu.load_data_frames(file_stats_df='file_stats_df')
    file_stats_df = data_frames_dict['file_stats_df']
    on_list = list(set(file_stats_df.columns).intersection(set(scene_stats_df.columns)))
    scene_stats_df = scene_stats_df.merge(file_stats_df, how='left', on=on_list).drop_duplicates().reset_index(drop=True)
    mask_series = scene_stats_df.responder_type.isnull()
    scene_stats_df.loc[mask_series, 'responder_type'] = 'Unknown'
    mask_series = scene_stats_df.site_name.isnull()
    scene_stats_df.loc[mask_series, 'site_name'] = 'Unknown'
    mask_series = scene_stats_df.encounter_layout.isnull()
    scene_stats_df.loc[mask_series, 'encounter_layout'] = 'Unknown'
    nu.store_objects(scene_stats_df=scene_stats_df)
    nu.save_data_frames(scene_stats_df=scene_stats_df)

In [6]:

# Fix the null file dates
mask_series = scene_stats_df.session_file_date.isnull()
if mask_series.any():
    print(f'I have {mask_series.sum()} scenes in my stats data frame without file dates.')
    data_frames_dict = nu.load_data_frames(frvrs_logs_df='frvrs_logs_df')
    frvrs_logs_df = data_frames_dict['frvrs_logs_df']
    for session_uuid, idx_df in scene_stats_df[mask_series].groupby('session_uuid'):
        
        # Get the whole session history
        mask_series = (frvrs_logs_df.session_uuid == session_uuid)
        session_df = frvrs_logs_df[mask_series]
        
        session_file_date = session_df.event_time.min().date()
        scene_stats_df.loc[idx_df.index, 'session_file_date'] = session_file_date
    nu.store_objects(scene_stats_df=scene_stats_df)
    nu.save_data_frames(scene_stats_df=scene_stats_df)
    mask_series = scene_stats_df.session_file_date.isnull()
    print(f'I now have {mask_series.sum()} scenes in my stats data frame without file dates.')

In [11]:

# Convert the session file date to a datetime64[ns]
if (str(scene_stats_df.session_file_date.dtypes) != 'datetime64[ns]'):
    scene_stats_df.session_file_date = to_datetime(scene_stats_df.session_file_date, infer_datetime_format=True)
    nu.store_objects(scene_stats_df=scene_stats_df)
    nu.save_data_frames(scene_stats_df=scene_stats_df)

In [12]:

# Check if all the patient IDs in any run are some variant of Mike and designate those runs as "Orientation"
new_column_name = 'scene_type'
if (new_column_name not in scene_stats_df.columns): scene_stats_df[new_column_name] = 'Triage'
column_value = 'Orientation'
if (column_value not in scene_stats_df.scene_type.unique()):
    
    # Filter out those files from the dataset and mark them
    base_mask_series = frvrs_logs_df.groupby(fu.scene_groupby_columns).patient_id.transform(lambda srs: all(
        srs.str.lower().str.contains('mike')
    ))
    session_uuids_list = frvrs_logs_df[base_mask_series].session_uuid.unique().tolist()
    mask_series = scene_stats_df.session_uuid.isin(session_uuids_list)
    scene_stats_df.loc[mask_series, new_column_name] = column_value
    
    # Store the results and show the new data frame shape
    nu.store_objects(scene_stats_df=scene_stats_df)
    nu.save_data_frames(scene_stats_df=scene_stats_df)
    print(scene_stats_df.shape) # (880, 59)
    
display(scene_stats_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Unnamed: 0_level_0,record_count
scene_type,Unnamed: 1_level_1
Orientation,395
Triage,485


In [13]:

# Any runs longer than that 16 minutes are probably an instance
# of someone taking off the headset and setting it on the ground.
# 1 second = 1,000 milliseconds; 1 minute = 60 seconds
new_column_name = 'is_scene_aborted'
# if (new_column_name in frvrs_logs_df.columns): frvrs_logs_df = frvrs_logs_df.drop(columns=new_column_name)
if (new_column_name not in scene_stats_df.columns):
    scene_stats_df[new_column_name] = False
    for (session_uuid, scene_id), scene_df in frvrs_logs_df.groupby(fu.scene_groupby_columns):
        mask_series = True
        for cn in fu.scene_groupby_columns: mask_series &= (scene_stats_df[cn] == eval(cn))
        scene_stats_df.loc[mask_series, new_column_name] = fu.get_is_scene_aborted(scene_df)
    
    # Store the results and show the new data frame shape
    nu.store_objects(scene_stats_df=scene_stats_df)
    nu.save_data_frames(scene_stats_df=scene_stats_df)
    
    print(scene_stats_df.shape) # (880, 59)
display(scene_stats_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Unnamed: 0_level_0,record_count
is_scene_aborted,Unnamed: 1_level_1
False,880


In [14]:

scene_stats_df.sample(8).T

Unnamed: 0,527,508,123,773,465,24,379,22
session_uuid,992d4672-7993-44ba-890c-0d8605ccefdd,9179223d-a6e0-4612-8200-40f30d05c11e,206da802-d38f-4f3a-98e3-44f99481479d,e3c2b4c6-d8b7-4b64-8cd0-7ca0e622f9b5,851741c0-56b3-4702-8fd2-c946376840fc,06574b6f-ab02-432c-9a65-7b031218a270,6b704e43-9ef4-4d5c-b39b-40c41648f8d3,06574b6f-ab02-432c-9a65-7b031218a270
scene_id,1,1,1,1,0,6,1,4
last_still_engagement,151731.0,104407.0,149250.0,285708.0,320116.0,71063.0,143923.0,63905.0
actual_engagement_distance,35.761345,36.702121,38.803266,39.818552,31.995616,44.717283,40.338421,34.253458
ideal_engagement_distance,40.741238,41.205982,41.363188,41.213144,28.18781,41.457958,40.998302,41.544918
measure_of_ideal_ordering,0.611431,0.42606,0.516149,0.542837,0.165626,0.676695,0.697932,0.444308
distracted_engagement_distance,29.55848,29.670197,31.403508,31.312498,28.419093,30.959859,30.075073,32.000526
measure_of_distracted_ordering,0.033897,0.224809,0.317356,0.066558,0.012796,0.302423,0.252135,0.103636
measure_of_right_ordering,0.905641,0.59524,0.813972,0.886881,0.411566,1.0,0.999171,0.731201
actual_ideal_delta,-4.979893,-4.503861,-2.559922,-1.394592,3.807806,3.259325,-0.659881,-7.29146
