In [1]:

# Set up the notebook
%pprint
import sys
if (osp.join('..', 'py') not in sys.path): sys.path.insert(1, osp.join('..', 'py'))

Pretty printing has been turned OFF


In [2]:

from FRVRS import (
    fu, nu, nan, isnan, listdir, makedirs, osp, remove, sep, walk, CategoricalDtype, DataFrame, Index, NaT, Series, concat, isna,
    notnull, read_csv, read_excel, read_pickle, to_datetime, math, np, re, subprocess, sys, warnings, pickle, display, to_numeric, csv, sm
)
import json

warnings.filterwarnings('ignore')


# Scene Stats Created for Metrics Evaluation Open World

In [3]:

# load data frames
data_frames_dict = nu.load_data_frames(metrics_evaluation_open_world_csv_stats_df='')
csv_stats_df = data_frames_dict['metrics_evaluation_open_world_csv_stats_df']
print(csv_stats_df.shape) # (276926, 109)

No pickle exists for metrics_evaluation_open_world_csv_stats_df - attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_csv_stats_df.csv.
(199476, 124)


In [None]:

# Add back in the orientation scenes and other scenes not in our poster (which doubles the size of the sample)
if nu.pickle_exists('metrics_evaluation_open_world_distance_delta_df'):
    distance_delta_df = nu.load_data_frames(
        metrics_evaluation_open_world_distance_delta_df=''
    )['metrics_evaluation_open_world_distance_delta_df']
else:
    distance_delta_df = fu.get_distance_deltas_dataframe(csv_stats_df)
    
    # Add the agony column
    if 'has_patient_in_agony' not in distance_delta_df.columns:
        distance_delta_df['has_patient_in_agony'] = False
        for (session_uuid, scene_id), idx_df in distance_delta_df.groupby(fu.scene_groupby_columns):
            
            # Get the whole scene history
            mask_series = True
            for cn in fu.scene_groupby_columns: mask_series &= (csv_stats_df[cn] == eval(cn))
            scene_df = csv_stats_df[mask_series]
            
            # Get whether any patient in the scene is in agony
            mask_series = False
            for cn in fu.mood_columns_list: mask_series |= (scene_df[cn] == 'agony')
            
            # Mark the scene in distance delta as agonistic
            if mask_series.any(): distance_delta_df.loc[idx_df.index, 'has_patient_in_agony'] = True
    
    # Add the patient count column
    if 'patient_count' not in distance_delta_df.columns:
        import numpy as np
        distance_delta_df['patient_count'] = nan
        for (session_uuid, scene_id), idx_df in distance_delta_df.groupby(fu.scene_groupby_columns):
            
            # Get the whole scene history
            mask_series = True
            for cn in fu.scene_groupby_columns: mask_series &= (csv_stats_df[cn] == eval(cn))
            scene_df = csv_stats_df[mask_series]
            
            # Get patient_count
            distance_delta_df.loc[idx_df.index, 'patient_count'] = fu.get_patient_count(scene_df)
    
    if 'cluster_label' not in distance_delta_df.columns:
        from sklearn.cluster import DBSCAN
        columns_list = ['actual_engagement_distance']
        X = distance_delta_df[columns_list].values
        
        # Set appropriate parameters for DBSCAN based on what gives 4 clusters
        dbscan = DBSCAN(eps=5, min_samples=1)
        dbscan.fit(X)
        
        # Get cluster labels for each data point
        distance_delta_df['cluster_label'] = dbscan.labels_
    
    nu.store_objects(metrics_evaluation_open_world_distance_delta_df=distance_delta_df)
    nu.save_data_frames(metrics_evaluation_open_world_distance_delta_df=distance_delta_df)
print(distance_delta_df.shape) # (133, 15)
print(sorted(distance_delta_df.columns))

In [None]:

def add_scene_columns_to_row(scene_df, row_dict):
    row_dict['first_engagement'] = fu.get_first_engagement(scene_df)
    row_dict['first_treatment'] = fu.get_first_treatment(scene_df)
    row_dict['injury_correctly_treated_count'] = fu.get_injury_correctly_treated_count(scene_df)
    row_dict['injury_not_treated_count'] = fu.get_injury_not_treated_count(scene_df)
    row_dict['injury_treatments_count'] = fu.get_injury_treatments_count(scene_df)
    row_dict['injury_wrongly_treated_count'] = fu.get_injury_wrongly_treated_count(scene_df)
    row_dict['is_scene_aborted'] = fu.get_is_scene_aborted(scene_df)
    row_dict['last_engagement'] = fu.get_last_engagement(scene_df)
    row_dict['patient_count'] = fu.get_patient_count(scene_df)
    row_dict['percent_hemorrhage_controlled'] = fu.get_percent_hemorrhage_controlled(scene_df)
    row_dict['pulse_taken_count'] = fu.get_pulse_taken_count(scene_df)
    row_dict['scene_end'] = fu.get_scene_end(scene_df)
    row_dict['scene_start'] = fu.get_scene_start(scene_df)
    row_dict['scene_type'] = fu.get_scene_type(scene_df)
    row_dict['stills_value'] = fu.get_stills_value(scene_df)
    row_dict['teleport_count'] = fu.get_teleport_count(scene_df)
    row_dict['time_to_hemorrhage_control_per_patient'] = fu.get_time_to_hemorrhage_control_per_patient(scene_df)
    row_dict['time_to_last_hemorrhage_controlled'] = fu.get_time_to_last_hemorrhage_controlled(scene_df)
    row_dict['total_actions_count'] = fu.get_total_actions_count(scene_df)
    row_dict['triage_time'] = fu.get_triage_time(scene_df)
    row_dict['voice_capture_count'] = fu.get_voice_capture_count(scene_df)
    row_dict['walk_command_count'] = fu.get_walk_command_count(scene_df)
    row_dict['walk_value'] = fu.get_walk_value(scene_df)
    row_dict['walkers_value'] = fu.get_walkers_value(scene_df)
    row_dict['wave_command_count'] = fu.get_wave_command_count(scene_df)
    row_dict['wave_value'] = fu.get_wave_value(scene_df)

    return row_dict

In [None]:

rows_list = []
engagment_columns_list = ['patient_id', 'engagement_start', 'location_tuple', 'patient_sort', 'predicted_priority', 'injury_severity']
for (session_uuid, scene_id), idx_df in distance_delta_df.groupby(fu.scene_groupby_columns):
    row_dict = list(idx_df.T.to_dict().values())[0]
    
    # Get the whole scene history
    mask_series = True
    for cn in fu.scene_groupby_columns: mask_series &= (csv_stats_df[cn] == eval(cn))
    if mask_series.any():
        scene_df = csv_stats_df[mask_series]
        row_dict['participant_id'] = scene_df.participant_id.iloc[0]
        
        # Get all the FRVRS utils scalar scene values
        row_dict = add_scene_columns_to_row(scene_df, row_dict)
    
    rows_list.append(row_dict)
scene_stats_df = DataFrame(rows_list)
nu.store_objects(metrics_evaluation_open_world_scene_stats_df=scene_stats_df)
nu.save_data_frames(metrics_evaluation_open_world_scene_stats_df=scene_stats_df)
print(scene_stats_df.shape) # (43, 49)

In [None]:

# Check if all the patient IDs in any run are some variant of Mike and designate those runs as "Orientation"
new_column_name = 'scene_type'
if (new_column_name in scene_stats_df.columns): scene_stats_df = scene_stats_df.drop(columns=new_column_name)
if (new_column_name not in scene_stats_df.columns): scene_stats_df[new_column_name] = 'Triage'
column_value = 'Orientation'
if (column_value not in scene_stats_df.scene_type):
    
    # Filter out those files from the dataset and mark them
    for (session_uuid, scene_id), scene_df in csv_stats_df.groupby(fu.scene_groupby_columns):
        patients_list = sorted(scene_df[~scene_df.patient_id.isnull()].patient_id.unique())
        is_mike_series = Series(patients_list).map(lambda x: 'mike' in str(x).lower())
        if is_mike_series.all():
            mask_series = True
            for cn in fu.scene_groupby_columns: mask_series &= (scene_stats_df[cn] == eval(cn))
            scene_stats_df.loc[mask_series, new_column_name] = column_value
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_scene_stats_df=scene_stats_df)
    nu.save_data_frames(metrics_evaluation_open_world_scene_stats_df=scene_stats_df)
    print(scene_stats_df.shape) # (76, 49)
    
display(scene_stats_df.groupby(['patient_count', 'is_scene_aborted', new_column_name]).size().to_frame().rename(columns={0: 'record_count'}))

In [None]:

# Any runs longer than that 16 minutes are probably an instance
# of someone taking off the headset and setting it on the ground.
# 1 second = 1,000 milliseconds; 1 minute = 60 seconds
new_column_name = 'is_scene_aborted'
# if (new_column_name in csv_stats_df.columns): csv_stats_df = csv_stats_df.drop(columns=new_column_name)
if (new_column_name not in scene_stats_df.columns):
    scene_stats_df[new_column_name] = False
    for (session_uuid, scene_id), scene_df in csv_stats_df.groupby(fu.scene_groupby_columns):
        mask_series = True
        for cn in fu.scene_groupby_columns: mask_series &= (scene_stats_df[cn] == eval(cn))
        scene_stats_df.loc[mask_series, new_column_name] = fu.get_is_scene_aborted(scene_df)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_scene_stats_df=scene_stats_df)
    nu.save_data_frames(metrics_evaluation_open_world_scene_stats_df=scene_stats_df)
    
    print(scene_stats_df.shape) # (880, 59)

display(scene_stats_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))