In [1]:

%pprint
import sys
sys.path.insert(1, '../py')

Pretty printing has been turned OFF


In [2]:

from frvrs_utils import FRVRSUtilities
from notebook_utils import NotebookUtilities
import os
import os.path as osp
import pandas as pd
import re

nu = NotebookUtilities(
    data_folder_path=osp.abspath('../data'),
    saves_folder_path=osp.abspath('../saves')
)
fu = FRVRSUtilities(
    data_folder_path=osp.abspath('../data'),
    saves_folder_path=osp.abspath('../saves')
)


# Find Negative Metrics in Jeremy's DCEMS Data


Look for a negative “time patient engaged to time tool applied (tourniquet) for that patient” for the first hemorrhage control.

In [3]:

# Add the CSVs to the data frame
dcems_logs_df = fu.concatonate_logs(logs_folder='../data/hem_control_dcems')

In [4]:

# Remove numerically-named columns
columns_list = [x for x in dcems_logs_df.columns if not re.search(r'\d+', str(x))]
dcems_logs_df = dcems_logs_df[columns_list]

In [5]:

# Convert 'TRUE' and 'FALSE' to boolean values
for cn in [
    'injury_record_injury_treated_with_wrong_treatment', 'injury_record_injury_treated',
    'injury_treated_injury_treated_with_wrong_treatment', 'injury_treated_injury_treated'
]:
    dcems_logs_df[cn] = dcems_logs_df[cn].map({'TRUE': True, 'FALSE': False, 'True': True, 'False': False})

In [6]:

# Check for duplicate file ingestion
mask_series = (dcems_logs_df.groupby('session_uuid').file_name.transform(pd.Series.nunique) > 1)
assert dcems_logs_df[mask_series].shape[0] == 0, "You have duplicate files"

In [7]:

# Modalize into one patient ID column if possible
new_column_name = 'patient_id'
if (new_column_name not in dcems_logs_df.columns):
    columns_list= [
        'patient_demoted_id', 'patient_record_id', 'injury_record_patient_id', 's_a_l_t_walk_if_can_patient_id',
        's_a_l_t_walked_patient_id', 's_a_l_t_wave_if_can_patient_id', 's_a_l_t_waved_patient_id', 'patient_engaged_id',
        'pulse_taken_patient_id', 'injury_treated_patient_id', 'tool_applied_patient_id', 'tag_applied_patient_id',
        'player_gaze_patient_id'
    ]
    dcems_logs_df = nu.modalize_columns(dcems_logs_df, columns_list, new_column_name)

In [8]:

# Modalize into one location ID column if possible
new_column_name = 'location_id'
if (new_column_name not in dcems_logs_df.columns):
    columns_list= [
        'teleport_location', 'patient_demoted_position', 'patient_record_position', 'injury_record_injury_injury_locator',
        's_a_l_t_walk_if_can_sort_location', 's_a_l_t_walked_sort_location', 's_a_l_t_wave_if_can_sort_location',
        's_a_l_t_waved_sort_location', 'patient_engaged_position', 'bag_access_location', 'injury_treated_injury_injury_locator',
        'bag_closed_location', 'tag_discarded_location', 'tool_discarded_location', 'player_location_location',
        'player_gaze_location'
    ]
    dcems_logs_df = nu.modalize_columns(dcems_logs_df, columns_list, new_column_name)

In [9]:

# Modalize into one injury ID column if possible
new_column_name = 'injury_id'
if (new_column_name not in dcems_logs_df.columns):
    dcems_logs_df = nu.modalize_columns(dcems_logs_df, ['injury_record_id', 'injury_treated_id'], new_column_name)

In [10]:

# Modalize into one patient sort column if possible
new_column_name = 'patient_sort'
if (new_column_name not in dcems_logs_df.columns):
    dcems_logs_df = nu.modalize_columns(dcems_logs_df, ['patient_demoted_sort', 'patient_record_sort', 'patient_engaged_sort'], new_column_name)

In [11]:

# Any runs longer than that 16 minutes are probably an instance
# of someone taking off the headset and setting it on the ground.
# 1 second = 1,000 milliseconds; 1 minute = 60 seconds
new_column_name = 'is_scene_aborted'
if (new_column_name not in dcems_logs_df.columns):
    dcems_logs_df[new_column_name] = False
    for (session_uuid, scene_id), scene_df in dcems_logs_df.groupby(fu.scene_groupby_columns):
        mask_series = True
        for cn in fu.scene_groupby_columns: mask_series &= (dcems_logs_df[cn] == eval(cn))
        dcems_logs_df.loc[mask_series, new_column_name] = fu.get_is_scene_aborted(scene_df)

In [12]:

# Check if all the patient IDs in any run are some variant of Mike and designate those runs as "Orientation"
if ('scene_type' not in dcems_logs_df.columns): dcems_logs_df['scene_type'] = 'Triage'
column_value = 'Orientation'
if (column_value not in dcems_logs_df.scene_type.unique()):
    
    # Filter out those files from the dataset and mark them
    base_mask_series = dcems_logs_df.groupby(fu.scene_groupby_columns).patient_id.transform(lambda srs: all(srs.str.lower().str.contains('mike')))
    dcems_logs_df.loc[base_mask_series, 'scene_type'] = column_value

In [14]:

# Get a sample with a clear count of responders
new_column_name = 'is_a_one_triage_file'
if (new_column_name not in dcems_logs_df.columns):
    dcems_logs_df[new_column_name] = False
    for file_name in dcems_logs_df.file_name.unique():
        is_a_one_triage_file = fu.get_is_a_one_triage_file(dcems_logs_df, file_name)
        mask_series = (dcems_logs_df.file_name == file_name)
        dcems_logs_df.loc[mask_series, new_column_name] = is_a_one_triage_file

In [15]:

print(dcems_logs_df.shape)

(108593, 106)


In [41]:

gb = dcems_logs_df.sort_values(['action_tick']).groupby(fu.patient_groupby_columns)
double_bleeders_list = []
for (session_uuid, scene_id, patient_id), patient_df in gb:
    mask_series = patient_df.injury_record_required_procedure.isin(fu.hemorrhage_control_procedures_list)
    if (patient_df[mask_series].shape[0] > 1):
        mask_series = patient_df.injury_treated_required_procedure.isin(fu.hemorrhage_control_procedures_list)
        action_ticks_list = patient_df[mask_series].action_tick.tolist()
        engagement_start = fu.get_first_patient_interaction(patient_df)
        if pd.isnull(engagement_start):
            key_tuple = (session_uuid, scene_id, patient_id)
            double_bleeders_list.append(key_tuple)
        else: assert engagement_start == min([engagement_start] + action_ticks_list), "Negative ENGAGEMENT_START"

In [43]:

len(double_bleeders_list), len(gb.groups)

(19, 901)

In [49]:

import random

key_tuple = random.choice(double_bleeders_list)
patient_mask_series = True
for i, cn in enumerate(fu.patient_groupby_columns): patient_mask_series &= (dcems_logs_df[cn] == key_tuple[i])
dcems_logs_df[patient_mask_series].dropna(axis='columns', how='all').T

Unnamed: 0,27032,27052,27067,27072
action_type,PATIENT_DEMOTED,PATIENT_RECORD,INJURY_RECORD,INJURY_RECORD
action_tick,3684,12217,12218,12218
event_time,2022-12-06 13:09:17,2022-12-06 13:09:25,2022-12-06 13:09:25,2022-12-06 13:09:25
session_uuid,4dac68bf-b62f-4e3f-af31-f2bb258fe225,4dac68bf-b62f-4e3f-af31-f2bb258fe225,4dac68bf-b62f-4e3f-af31-f2bb258fe225,4dac68bf-b62f-4e3f-af31-f2bb258fe225
file_name,/4dac68bf-b62f-4e3f-af31-f2bb258fe225.csv,/4dac68bf-b62f-4e3f-af31-f2bb258fe225.csv,/4dac68bf-b62f-4e3f-af31-f2bb258fe225.csv,/4dac68bf-b62f-4e3f-af31-f2bb258fe225.csv
logger_version,1.0,1.0,1.0,1.0
patient_demoted_health_level,100,,,
patient_demoted_health_time_remaining,Infinity,,,
patient_demoted_id,Helga_0 Root,,,
patient_demoted_position,"(-0.2, 0.0, 7.8)",,,
