In [1]:

%pprint
import sys
if ('../py' not in sys.path): sys.path.insert(1, '../py')

Pretty printing has been turned OFF


In [2]:

from FRVRS import nu, fu
from pandas import DataFrame, Series
from itertools import product
from pysan.statistics import get_turbulence
from pysan import get_entropy, get_complexity

In [3]:

# load data frames
data_frames_list = nu.load_data_frames(metrics_evaluation_open_world_df='')
logs_df = data_frames_list['metrics_evaluation_open_world_df']
print(logs_df.shape) # (66069, 109)

Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl.
(276926, 109)


In [5]:

# Add back in the orientation scenes and other scenes not in our poster (which doubles the size of the sample)
if nu.pickle_exists('metrics_evaluation_open_world_distance_delta_df'): distance_delta_df = nu.load_object('metrics_evaluation_open_world_distance_delta_df')
else:
    distance_delta_df = fu.get_distance_deltas_data_frame(logs_df)
    
    # Add the agony column
    if 'has_patient_in_agony' not in distance_delta_df.columns:
        distance_delta_df['has_patient_in_agony'] = False
        for (session_uuid, scene_id), idx_df in distance_delta_df.groupby(fu.scene_groupby_columns):
            
            # Get the whole scene history
            mask_series = True
            for cn in fu.scene_groupby_columns: mask_series &= (logs_df[cn] == eval(cn))
            scene_df = logs_df[mask_series]
            
            # Get whether any patient in the scene is in agony
            mask_series = False
            for cn in fu.mood_columns_list: mask_series |= (scene_df[cn] == 'agony')
            
            # Mark the scene in distance delta as agonistic
            if mask_series.any(): distance_delta_df.loc[idx_df.index, 'has_patient_in_agony'] = True
    
    # Add the patient count column
    if 'patient_count' not in distance_delta_df.columns:
        import numpy as np
        distance_delta_df['patient_count'] = np.nan
        for (session_uuid, scene_id), idx_df in distance_delta_df.groupby(fu.scene_groupby_columns):
            
            # Get the whole scene history
            mask_series = True
            for cn in fu.scene_groupby_columns: mask_series &= (logs_df[cn] == eval(cn))
            scene_df = logs_df[mask_series]
            
            # Get patient_count
            distance_delta_df.loc[idx_df.index, 'patient_count'] = fu.get_patient_count(scene_df)
    
    if 'cluster_label' not in distance_delta_df.columns:
        from sklearn.cluster import DBSCAN
        columns_list = ['actual_engagement_distance', 'ideal_engagement_distance']
        X = distance_delta_df[columns_list].values
        
        # Set appropriate parameters for DBSCAN based on what gives 4 clusters
        dbscan = DBSCAN(eps=5, min_samples=1)
        dbscan.fit(X)
        
        # Get cluster labels for each data point
        distance_delta_df['cluster_label'] = dbscan.labels_
    
    nu.store_objects(metrics_evaluation_open_world_distance_delta_df=distance_delta_df)
    nu.save_data_frames(metrics_evaluation_open_world_distance_delta_df=distance_delta_df)
print(distance_delta_df.shape) # (43, 15)
print(sorted(distance_delta_df.columns))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_distance_delta_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_distance_delta_df.csv
(133, 15)
['actual_distracted_delta', 'actual_engagement_distance', 'actual_ideal_delta', 'adherence_to_salt', 'cluster_label', 'distracted_engagement_distance', 'has_patient_in_agony', 'ideal_engagement_distance', 'last_still_engagement', 'measure_of_distracted_ordering', 'measure_of_ideal_ordering', 'measure_of_right_ordering', 'patient_count', 'scene_id', 'session_uuid']



# Scene Stats Created for Metrics Evaluation Open World

In [6]:

if nu.pickle_exists('metrics_evaluation_open_world_scene_stats_df'): scene_stats_df = nu.load_object('metrics_evaluation_open_world_scene_stats_df')
else:
    
    # Get the combo alphabet list
    combos_alphabet = sorted(set([sequence_tuple[0] + '-' + sequence_tuple[1] for sequence_tuple in product(fu.patient_sort_order, fu.injury_severity_order)]))
    
    rows_list = []
    engagment_columns_list = ['patient_id', 'engagement_start', 'location_tuple', 'patient_sort', 'predicted_priority', 'injury_severity']
    for (session_uuid, scene_id), idx_df in distance_delta_df.groupby(fu.scene_groupby_columns):
        row_dict = list(idx_df.T.to_dict().values())[0]
        
        # Get the whole scene history
        mask_series = True
        for cn in fu.scene_groupby_columns: mask_series &= (logs_df[cn] == eval(cn))
        scene_df = logs_df[mask_series]
        
        # Get the engagement sequence and the stats from that
        actual_engagement_order = fu.get_actual_engagement_order(scene_df, verbose=False)
        if actual_engagement_order:
            df = DataFrame(actual_engagement_order, columns=engagment_columns_list)
            patient_sorts_list = df.apply(lambda row_series: row_series.patient_sort, axis='columns').tolist()
            patient_sorts_sequence, patient_sorts_string_to_integer_map = nu.convert_strings_to_integers(patient_sorts_list, alphabet_list=fu.patient_sort_order)
            injury_severitys_list = df.apply(lambda row_series: row_series.injury_severity, axis='columns').tolist()
            injury_severitys_sequence, injury_severitys_string_to_integer_map = nu.convert_strings_to_integers(
                injury_severitys_list, alphabet_list=fu.injury_severity_order
            )
            combos_list = [str(patient_sort) + '-' + str(injury_severity) for patient_sort, injury_severity in zip(patient_sorts_list, injury_severitys_list)]
            combos_sequence, combos_string_to_integer_map = nu.convert_strings_to_integers(combos_list, alphabet_list=combos_alphabet)
            
            # Replace negative values with 9s to maintain character width
            patient_sorts_sequence[patient_sorts_sequence < 0] = 9
            injury_severitys_sequence[injury_severitys_sequence < 0] = 9
            combos_sequence[combos_sequence < 0] = 9
            
            for fn in [get_turbulence, get_entropy, get_complexity]:
                function_name = fn.__name__
                for cn in ['patient_sort', 'injury_severity', 'combo']:
                    dictionary_key = function_name.replace('get', cn)
                    variable_name = f'{cn}s_sequence'
                    try:
                        dictionary_value = fn(list(eval(variable_name)))
                        row_dict[dictionary_key] = dictionary_value
                    except: continue
        
        row_dict['first_engagement'] = fu.get_first_engagement(scene_df)
        row_dict['first_treatment'] = fu.get_first_treatment(scene_df)
        row_dict['injury_correctly_treated_count'] = fu.get_injury_correctly_treated_count(scene_df)
        row_dict['injury_not_treated_count'] = fu.get_injury_not_treated_count(scene_df)
        row_dict['injury_treated_count'] = fu.get_injury_treatments_count(scene_df)
        row_dict['injury_wrongly_treated_count'] = fu.get_injury_wrongly_treated_count(scene_df)
        row_dict['is_scene_aborted'] = fu.get_is_scene_aborted(scene_df)
        row_dict['last_engagement'] = fu.get_last_engagement(scene_df)
        row_dict['logger_version'] = fu.get_logger_version(scene_df)
        row_dict['measure_of_right_ordering'] = fu.get_measure_of_right_ordering(scene_df)
        row_dict['patient_count'] = fu.get_patient_count(scene_df)
        row_dict['percent_hemorrhage_controlled'] = fu.get_percent_hemorrhage_controlled(scene_df)
        row_dict['pulse_taken_count'] = fu.get_pulse_taken_count(scene_df)
        row_dict['scene_end'] = fu.get_scene_end(scene_df)
        row_dict['scene_start'] = fu.get_scene_start(scene_df)
        row_dict['scene_type'] = fu.get_scene_type(scene_df)
        row_dict['stills_value'] = fu.get_stills_value(scene_df)
        row_dict['teleport_count'] = fu.get_teleport_count(scene_df)
        row_dict['time_to_last_hemorrhage_controlled'] = fu.get_time_to_last_hemorrhage_controlled(scene_df)
        row_dict['total_actions'] = fu.get_total_actions(scene_df)
        row_dict['triage_time'] = fu.get_triage_time(scene_df)
        row_dict['voice_capture_count'] = fu.get_voice_capture_count(scene_df)
        row_dict['walk_command_count'] = fu.get_walk_command_count(scene_df)
        row_dict['walk_value'] = fu.get_walk_value(scene_df)
        row_dict['walkers_value'] = fu.get_walkers_value(scene_df)
        row_dict['wave_command_count'] = fu.get_wave_command_count(scene_df)
        row_dict['wave_value'] = fu.get_wave_value(scene_df)
        rows_list.append(row_dict)
    scene_stats_df = DataFrame(rows_list)
    nu.store_objects(metrics_evaluation_open_world_scene_stats_df=scene_stats_df)
    nu.save_data_frames(metrics_evaluation_open_world_scene_stats_df=scene_stats_df)
print(scene_stats_df.shape) # (43, 49)

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_scene_stats_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_scene_stats_df.csv
(133, 49)


In [7]:

# Check if all the patient IDs in any run are some variant of Mike and designate those runs as "Orientation"
new_column_name = 'scene_type'
if (new_column_name in scene_stats_df.columns): scene_stats_df = scene_stats_df.drop(columns=new_column_name)
if (new_column_name not in scene_stats_df.columns): scene_stats_df[new_column_name] = 'Triage'
column_value = 'Orientation'
if (column_value not in scene_stats_df.scene_type):
    
    # Filter out those files from the dataset and mark them
    for (session_uuid, scene_id), scene_df in logs_df.groupby(fu.scene_groupby_columns):
        patients_list = sorted(scene_df[~scene_df.patient_id.isnull()].patient_id.unique())
        is_mike_series = Series(patients_list).map(lambda x: 'mike' in str(x).lower())
        if is_mike_series.all():
            mask_series = True
            for cn in fu.scene_groupby_columns: mask_series &= (scene_stats_df[cn] == eval(cn))
            scene_stats_df.loc[mask_series, new_column_name] = column_value
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_scene_stats_df=scene_stats_df)
    nu.save_data_frames(metrics_evaluation_open_world_scene_stats_df=scene_stats_df)
    print(scene_stats_df.shape) # (43, 49)
    
display(scene_stats_df.groupby(['patient_count', 'is_scene_aborted', new_column_name]).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_scene_stats_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_scene_stats_df.csv
(133, 49)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,record_count
patient_count,is_scene_aborted,scene_type,Unnamed: 3_level_1
0,False,Orientation,1
0,True,Orientation,31
1,True,Triage,3
2,False,Triage,7
2,True,Triage,1
3,False,Triage,6
4,False,Triage,5
4,True,Triage,1
5,False,Triage,9
6,False,Triage,8


In [8]:

mask_series = ~scene_df.patient_id.isnull()
df = scene_df[mask_series]
if df.shape[0]: display(df.sample(min(df.shape[0], 6)).dropna(axis='columns', how='all').T)

Unnamed: 0,213217,213426,213293,214326,213746,213608
action_type,INJURY_RECORD,PLAYER_GAZE,PLAYER_GAZE,PULSE_TAKEN,PLAYER_GAZE,PLAYER_GAZE
action_tick,122234,156263,131490,451183,209087,189013
event_time,2024-03-20 10:26:23,2024-03-20 10:26:57,2024-03-20 10:26:33,2024-03-20 10:31:52,2024-03-20 10:27:50,2024-03-20 10:27:30
session_uuid,fc676b00-9559-467d-adbb-696dd6e9fb08,fc676b00-9559-467d-adbb-696dd6e9fb08,fc676b00-9559-467d-adbb-696dd6e9fb08,fc676b00-9559-467d-adbb-696dd6e9fb08,fc676b00-9559-467d-adbb-696dd6e9fb08,fc676b00-9559-467d-adbb-696dd6e9fb08
file_name,Metrics Evaluation Open World/ITM 3.20.2024 40...,Metrics Evaluation Open World/ITM 3.20.2024 40...,Metrics Evaluation Open World/ITM 3.20.2024 40...,Metrics Evaluation Open World/ITM 3.20.2024 40...,Metrics Evaluation Open World/ITM 3.20.2024 40...,Metrics Evaluation Open World/ITM 3.20.2024 40...
logger_version,1.4,1.4,1.4,1.4,1.4,1.4
injury_record_id,R Calf Laceration,,,,,
injury_record_patient_id,Open World Civilian 2 Female Root,,,,,
injury_record_required_procedure,gauzePressure,,,,,
injury_record_severity,low,,,,,


In [9]:

# Any runs longer than that 16 minutes are probably an instance
# of someone taking off the headset and setting it on the ground.
# 1 second = 1,000 milliseconds; 1 minute = 60 seconds
new_column_name = 'is_scene_aborted'
# if (new_column_name in logs_df.columns): logs_df = logs_df.drop(columns=new_column_name)
if (new_column_name not in scene_stats_df.columns):
    scene_stats_df[new_column_name] = False
    for (session_uuid, scene_id), scene_df in logs_df.groupby(fu.scene_groupby_columns):
        mask_series = True
        for cn in fu.scene_groupby_columns: mask_series &= (scene_stats_df[cn] == eval(cn))
        scene_stats_df.loc[mask_series, new_column_name] = fu.get_is_scene_aborted(scene_df)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_scene_stats_df=scene_stats_df)
    nu.save_data_frames(metrics_evaluation_open_world_scene_stats_df=scene_stats_df)
    
    print(scene_stats_df.shape) # (880, 59)
display(scene_stats_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Unnamed: 0_level_0,record_count
is_scene_aborted,Unnamed: 1_level_1
False,74
True,59



# Verification

In [10]:

# Get them set up as if we were going to run triage accuracy and time to hemorrhage control stats on them
# I know they are small samples, I primarily want you to let us know what information is missing for you to look at those things.
# I am also interested in variability around each patient. Are you able to segregate the files by patient and tell me something about
# how different participants acted (assess, treat, tag) in response to each patient?
# I also want to extract from each csv a list of the actions the responder did (engage, assess, treat, and tag actions, not movement or others)
# so that I can see a list of what they did in the scenario (this will eventually form the basis of delegation decisions).
# Basically I would like you to play with this small sample in anticipation of receiving a similar, larger dataset from these new scenarios.
# Let me know if you have questions but also please be patient as this is low priority compared to evaluation preparation this week.
import os
import glob
import os.path as osp

data_frames_list = nu.load_data_frames(verbose=False, metrics_evaluation_open_world_scene_stats_df='', metrics_evaluation_open_world_file_stats_df='')
file_stats_df = data_frames_list['metrics_evaluation_open_world_file_stats_df']
logs_folder = '../data/logs'
pattern = osp.join(logs_folder, 'Disaster Day 3.6.2024 ITM Files 405*')
csv_count = 0
for logs_path in glob.glob(pattern):
    csv_count += len([fn for sub_directory, directories_list, files_list in os.walk(logs_path) for fn in files_list if fn.lower().endswith('.csv')])
uuids_count = file_stats_df.session_uuid.unique().shape[0]
elevens_mask = (scene_stats_df.patient_count >= 11) & (scene_stats_df.is_scene_aborted == False)
scene_type_dict = scene_stats_df[elevens_mask].groupby('scene_type').size().to_dict()
orientation_count = scene_type_dict['Orientation'] if 'Orientation' in scene_type_dict else 0
triage_count = scene_type_dict['Triage'] if 'Triage' in scene_type_dict else 0
aborted_count = scene_stats_df[scene_stats_df.is_scene_aborted].shape[0]
aborted_suffix = 's' if (aborted_count != 1) else ''
one_triage_dict = file_stats_df.groupby('is_a_one_triage_file').size().to_dict()
print(f'''Okay, I have ingested Disaster day 3.6.2024 ITM files 405F.zip and Disaster Day 3.6.2024 ITM Files 405E.zip and have verified that:
•	There are {csv_count} CSV files in the zip file.
•	There are {uuids_count} unique session UUIDs in there.
•	There are {orientation_count} 11-patient-or-above Orientation scenes and {triage_count} Triage scenes in there.
•	The time difference between when the scene starts and the last engagement for {aborted_count} scene{aborted_suffix} is longer than 16 minutes.
•	There are {one_triage_dict.get(True, 0)} files that have one and only one triage scene in them. (The other {one_triage_dict.get(False, 0)}''')
print(''' are not one-triage-scene files).
If any of these is unexpected, please explain why.
''')
mask_series = (scene_stats_df.scene_type == 'Orientation') & elevens_mask
uuids_list = sorted(scene_stats_df[mask_series].session_uuid.unique())
mask_series = file_stats_df.session_uuid.isin(uuids_list)
if mask_series.any():
    print(
        f'P.S. The files with orientation scenes in them are:'
    )
    for file_name in sorted(file_stats_df[mask_series].session_file_name): print(file_name)

KeyError: 'is_a_one_triage_file'

In [12]:

from IPython.display import HTML

if 'is_in_registry' in file_stats_df:
    registry_uuids = sorted(file_stats_df[file_stats_df.is_in_registry].session_uuid.unique())
    print(f'Of the {len(registry_uuids)} files in the registry, ', end='')
    mask_series = scene_stats_df.session_uuid.isin(registry_uuids) & (scene_stats_df.scene_type == 'Triage')
    print(f'of the {scene_stats_df[mask_series].shape[0]} triage scenes in those files, this is the counts of the values of responder type:')
    display(HTML(file_stats_df.responder_type.value_counts().to_frame().to_html()))

Of the 0 files in the registry, of the 0 triage scenes in those files, this is the counts of the values of responder type:


Unnamed: 0,responder_type
EM-RES1,22


In [13]:

display(HTML(scene_stats_df.groupby(['patient_count', 'scene_type']).size().to_frame().rename(columns={0: 'record_count'}).reset_index(drop=False).to_html()))

Unnamed: 0,patient_count,scene_type,record_count
0,0,Orientation,9
1,2,Triage,4
2,3,Triage,2
3,4,Triage,6
4,5,Triage,11
5,6,Triage,10
6,9,Triage,1


In [14]:

mask_series = (scene_stats_df.scene_type == 'Orientation')
df = scene_stats_df[mask_series].dropna(axis='columns', how='all')
display(df.T)
if 'session_uuid' in df.columns:
    session_uuids_list = sorted(df.session_uuid.unique())
    mask_series = file_stats_df.session_uuid.isin(session_uuids_list)
    df = file_stats_df[mask_series].dropna(axis='columns', how='all')
    display(df.T)

Unnamed: 0,1,6,19,26,28,32,34,39,42
session_uuid,156692d8-97ab-4631-a64c-36c3a57bd506,23c74759-5061-445f-9f37-c003c62c3a5f,55850e8a-bff2-466a-a75b-87cce5bd0599,7b466ec9-78ca-43b1-8fbd-aec32796176c,8cb0ca8f-b626-4bc7-bfd5-1f44770e088a,a63bb859-5d18-421c-b52d-2d394bce589c,aa28f824-cd70-4d66-a843-64a1f3636485,c3f2165f-4878-492c-8644-6ae27832eef8,d31e1fd0-e9c7-4a76-bb7a-d5e786de2761
scene_id,1,3,2,1,1,2,1,1,2
actual_engagement_distance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ideal_engagement_distance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
distracted_engagement_distance,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
actual_ideal_delta,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
actual_distracted_delta,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
adherence_to_salt,False,False,False,False,False,False,False,False,False
has_patient_in_agony,False,False,False,False,False,False,False,False,False
patient_count,0,0,0,0,0,0,0,0,0


Unnamed: 0,5,34,52,64,76,81,87,110,116
player_location_left_hand_location,"(0.0, 0.0, 0.0)",,"(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)",
logger_version,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4,1.4
session_uuid,23c74759-5061-445f-9f37-c003c62c3a5f,55850e8a-bff2-466a-a75b-87cce5bd0599,8cb0ca8f-b626-4bc7-bfd5-1f44770e088a,a63bb859-5d18-421c-b52d-2d394bce589c,c3f2165f-4878-492c-8644-6ae27832eef8,d31e1fd0-e9c7-4a76-bb7a-d5e786de2761,156692d8-97ab-4631-a64c-36c3a57bd506,7b466ec9-78ca-43b1-8fbd-aec32796176c,aa28f824-cd70-4d66-a843-64a1f3636485
player_location_right_hand_location,"(0.0, 0.0, 0.0)",,"(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)","(0.0, 0.0, 0.0)",
file_name,Disaster Day 3.6.2024 ITM Files 405E/23c74759-...,Disaster Day 3.6.2024 ITM Files 405E/55850e8a-...,Disaster Day 3.6.2024 ITM Files 405E/8cb0ca8f-...,Disaster Day 3.6.2024 ITM Files 405E/a63bb859-...,Disaster Day 3.6.2024 ITM Files 405E/c3f2165f-...,Disaster Day 3.6.2024 ITM Files 405E/d31e1fd0-...,Disaster day 3.6.2024 ITM files 405F/156692d8-...,Disaster day 3.6.2024 ITM files 405F/7b466ec9-...,Disaster day 3.6.2024 ITM files 405F/aa28f824-...
session_file_date,2024-03-06 00:00:00,2024-03-06 00:00:00,2024-03-06 00:00:00,2024-03-06 00:00:00,2024-03-06 00:00:00,2024-03-05 00:00:00,2024-03-06 00:00:00,2024-03-06 00:00:00,2024-03-06 00:00:00
session_file_name,23c74759-5061-445f-9f37-c003c62c3a5f.csv,55850e8a-bff2-466a-a75b-87cce5bd0599.csv,8cb0ca8f-b626-4bc7-bfd5-1f44770e088a.csv,a63bb859-5d18-421c-b52d-2d394bce589c.csv,c3f2165f-4878-492c-8644-6ae27832eef8.csv,d31e1fd0-e9c7-4a76-bb7a-d5e786de2761.csv,156692d8-97ab-4631-a64c-36c3a57bd506.csv,7b466ec9-78ca-43b1-8fbd-aec32796176c.csv,aa28f824-cd70-4d66-a843-64a1f3636485.csv
responder_type,EM-RES1,EM-RES1,EM-RES1,EM-RES1,EM-RES1,EM-RES1,EM-RES1,EM-RES1,EM-RES1
site_name,OSU,OSU,OSU,OSU,OSU,OSU,OSU,OSU,OSU
encounter_layout,Submarine,Urban,Urban,Submarine,Discard,Submarine,Submarine,Urban,Urban



----