In [1]:

%pprint
import sys
if ('../py' not in sys.path): sys.path.insert(1, '../py')

Pretty printing has been turned OFF


In [2]:

from FRVRS import fu, nu
from pandas import DataFrame, Series, concat, notnull, Index, to_datetime
import json
import numpy as np
import os
import os.path as osp
import pandas as pd
import re


# Dataset Built for Metrics Evaluation Open World

In [3]:

# Get all the Open World logs into one data frame
metrics_evaluation_open_world_df = DataFrame([])
def get_directory_paths(participant_id, logs_path='../data/logs/Metrics Evaluation Open World'):
    
    # Get all the directory names which are suffixed by the participant ID
    directories_list = os.listdir(logs_path)
    
    # Loop through the directories and IDs and find a pair that matches
    participant_dirs_list = [dir_name for dir_name in directories_list if dir_name.endswith(str(participant_id))]

    return participant_dirs_list
logs_path = '../data/logs/Human_Sim_Metrics_Data 4-12-2024'
for participant_id in range(2_024_201, 2_024_223+1):
    for dir_name in get_directory_paths(participant_id, logs_path):
        
        # Add the CSVs to the data frame
        folder_path = osp.join(logs_path, dir_name)
        logs_df = fu.concatonate_logs(logs_folder=folder_path)
        logs_df['participant_id'] = participant_id
        
        # Remove numerically-named columns
        columns_list = [x for x in logs_df.columns if not re.search(r'\d+', str(x))]
        logs_df = logs_df[columns_list]
        
        # Convert 'TRUE' and 'FALSE' to boolean values
        for cn in fu.boolean_columns_list:
            logs_df[cn] = logs_df[cn].map({'TRUE': True, 'FALSE': False, 'True': True, 'False': False})
        
        # Convert the nulls into NaNs
        for cn in logs_df.columns: logs_df[cn] = logs_df[cn].replace('null', np.nan)
        
        # Append the data frame for the current subdirectory to the main data frame and break the participant ID loop
        metrics_evaluation_open_world_df = pd.concat([metrics_evaluation_open_world_df, logs_df], axis='index')

metrics_evaluation_open_world_df = metrics_evaluation_open_world_df.reset_index(drop=True)
metrics_evaluation_open_world_df['csv_file_name'] = metrics_evaluation_open_world_df.file_name.map(lambda x: str(x).split('/')[-1])
nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
print(metrics_evaluation_open_world_df.participant_id.nunique()) # 22
print(metrics_evaluation_open_world_df.shape) # (171766, 112)

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
22
(171766, 112)



## Check for proper ingestion (duplicate file ingestion, et al)

In [4]:

# Check you even ingested anything
assert len(metrics_evaluation_open_world_df.columns) > 4, "Nothing ingested"

display(metrics_evaluation_open_world_df.groupby('logger_version').size().to_frame().rename(columns={0: 'record_count'})) # 276926

Unnamed: 0_level_0,record_count
logger_version,Unnamed: 1_level_1
1.4,171766


In [5]:

# Filter all the rows that have more than one unique value in the file_name column for each value in the session_uuid column
mask_series = (metrics_evaluation_open_world_df.groupby('session_uuid').file_name.transform(Series.nunique) > 1)
assert not mask_series.any(), "You have duplicate files"
# columns_list = ['session_uuid', 'file_name']
# for (session_uuid, file_name), df in metrics_evaluation_open_world_df[mask_series][columns_list].drop_duplicates().sort_values(columns_list).groupby(
#     columns_list
# ):
#     if not file_name.startswith('Double runs removed/'):
#         file_path = osp.join(fu.data_logs_folder, *file_name.split('/'))
#         os.remove(file_path)

In [6]:

# Check that all your junk scenes are the last scenes
display(metrics_evaluation_open_world_df.groupby('is_scene_aborted').size().to_frame().rename(columns={0: 'record_count'}))
mask_series = metrics_evaluation_open_world_df.is_scene_aborted
for (session_uuid, scene_id), scene_df in metrics_evaluation_open_world_df[mask_series].groupby(fu.scene_groupby_columns):
    mask_series = (metrics_evaluation_open_world_df.session_uuid == session_uuid)
    max_scene_id = metrics_evaluation_open_world_df[mask_series].scene_id.max()
    assert max_scene_id == scene_id, "You've got junk scenes in strange places"

Unnamed: 0_level_0,record_count
is_scene_aborted,Unnamed: 1_level_1
False,170201
True,1565



## Add new features according to your increasing domain knowledge


### Modalize separate columns into one

In [7]:

# Modalize into one patient ID column if possible
new_column_name = 'patient_id'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.patient_id_columns_list, new_column_name)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    print(metrics_evaluation_open_world_df.shape) # (171766, 98)

print(metrics_evaluation_open_world_df[new_column_name].nunique()) # 39
display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}).sort_values(
    'record_count', ascending=False
).head(5))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 113)
39


Unnamed: 0_level_0,record_count
patient_id,Unnamed: 1_level_1
Patient V Root,3062
Patient U Root,2726
Open World Marine 1 Female Root,2359
patient U Root,1757
Local Soldier 1 Root,1359


In [8]:

# Modalize into one injury ID column if possible
new_column_name = 'injury_id'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.injury_id_columns_list, new_column_name)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 99)

print(metrics_evaluation_open_world_df[new_column_name].nunique()) # 34
display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}).sort_values(
    'record_count', ascending=False
).head(5))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 114)
34


Unnamed: 0_level_0,record_count
injury_id,Unnamed: 1_level_1
L Leg Broken,151
R Forearm Burn,131
L Shoulder Broken,116
R Shoulder Puncture,112
L Bicep Puncture,109


In [9]:

# Modalize into one location ID column if possible
new_column_name = 'location_id'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.location_id_columns_list, new_column_name)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 100)

print(metrics_evaluation_open_world_df[new_column_name].nunique()) # 9239
display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}).sort_values(
    'record_count', ascending=False
).head(5))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 115)
9284


Unnamed: 0_level_0,record_count
location_id,Unnamed: 1_level_1
"(0.0, 0.0, 0.0)",2320
"(15.1, 0.0, -27.9)",2102
"(-19.6, 0.0, -10.0)",1916
"(-3.3, 0.0, 0.0)",1614
"(-22.5, 0.0, -10.7)",1546


In [10]:

# Modalize into one patient sort column if possible
new_column_name = 'patient_sort'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.sort_columns_list, new_column_name)
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.sort_category_order)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 101)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 116)


Unnamed: 0_level_0,record_count
patient_sort,Unnamed: 1_level_1
still,768
waver,665
walker,336


In [11]:

# Modalize into one patient pulse column if possible
new_column_name = 'patient_pulse'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.pulse_columns_list, new_column_name)
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.pulse_category_order)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 102)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 117)


Unnamed: 0_level_0,record_count
patient_pulse,Unnamed: 1_level_1
none,2
faint,350
fast,732
normal,685


In [12]:

# Modalize into one patient salt column if possible
new_column_name = 'patient_salt'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.salt_columns_list, new_column_name)
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.salt_category_order)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 103)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 118)


Unnamed: 0_level_0,record_count
patient_salt,Unnamed: 1_level_1
DEAD,0
EXPECTANT,325
IMMEDIATE,578
DELAYED,647
MINIMAL,219


In [13]:

# Modalize into one patient hearing column if possible
new_column_name = 'patient_hearing'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.hearing_columns_list, new_column_name)
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.hearing_category_order)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 104)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 119)


Unnamed: 0_level_0,record_count
patient_hearing,Unnamed: 1_level_1
none,0
limited,19
normal,1303


In [14]:

# Modalize into one patient breath column if possible
new_column_name = 'patient_breath'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.breath_columns_list, new_column_name)
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.breath_category_order)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 105)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 120)


Unnamed: 0_level_0,record_count
patient_breath,Unnamed: 1_level_1
none,93
collapsedLeft,121
collapsedRight,0
restricted,155
fast,593
normal,1054


In [15]:

# Modalize into one patient mood column if possible
new_column_name = 'patient_mood'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.mood_columns_list, new_column_name)
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.mood_category_order)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 106)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 121)


Unnamed: 0_level_0,record_count
patient_mood,Unnamed: 1_level_1
dead,342
unresponsive,0
agony,396
upset,284
calm,197


In [16]:

# Modalize into one patient pose column if possible
new_column_name = 'patient_pose'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.pose_columns_list, new_column_name)
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.pose_category_order)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 107)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 122)


Unnamed: 0_level_0,record_count
patient_pose,Unnamed: 1_level_1
supine,743
fetal,124
sittingGround,220
kneeling,21
recovery,0
standing,111


In [17]:

# Modalize into one injury severity column if possible
new_column_name = 'injury_severity'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.severity_columns_list, new_column_name)
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.severity_category_order)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 108)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 123)


Unnamed: 0_level_0,record_count
injury_severity,Unnamed: 1_level_1
high,829
medium,861
low,94


In [18]:

# Modalize into one injury required_procedure column if possible
new_column_name = 'injury_required_procedure'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.required_procedure_columns_list, new_column_name)
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.required_procedure_category_order)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 109)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 124)


Unnamed: 0_level_0,record_count
injury_required_procedure,Unnamed: 1_level_1
tourniquet,300
gauzePressure,96
decompress,75
woundpack,281
airway,25
none,129


In [19]:

# Modalize into one injury body_region column if possible
new_column_name = 'injury_body_region'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.body_region_columns_list, new_column_name)
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.body_region_category_order)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 110)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 125)


Unnamed: 0_level_0,record_count
injury_body_region,Unnamed: 1_level_1
head,47
neck,48
chest,306
abdomen,101
leftLeg,267
rightLeg,275
rightArm,297
leftArm,392


In [20]:

# Modalize into one tool type column if possible
new_column_name = 'tool_type'
if (new_column_name not in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df = nu.modalize_columns(metrics_evaluation_open_world_df, fu.tool_type_columns_list, new_column_name)
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.tool_type_category_order)
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    
    print(metrics_evaluation_open_world_df.shape) # (171766, 111)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 126)


Unnamed: 0_level_0,record_count
tool_type,Unnamed: 1_level_1
Tourniquet,5846
Gauze_Pack,183
Needle,11150
Naso,60
Nasal Airway,5233
Gauze_Dressing,168



### Convert text columns to categorical

In [21]:

new_column_name = 'pulse_taken_pulse_name'
if (new_column_name in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.pulse_name_category_order)
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv


Unnamed: 0_level_0,record_count
pulse_taken_pulse_name,Unnamed: 1_level_1
pulse_none,12
pulse_faint,391
pulse_fast,480
pulse_normal,249


In [22]:

new_column_name = 'tool_applied_data'
if (new_column_name in metrics_evaluation_open_world_df.columns):
    metrics_evaluation_open_world_df[new_column_name] = metrics_evaluation_open_world_df[new_column_name].astype(fu.tool_data_category_order)
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)

display(metrics_evaluation_open_world_df.groupby(new_column_name).size().to_frame().rename(columns={0: 'record_count'}))

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv


Unnamed: 0_level_0,record_count
tool_applied_data,Unnamed: 1_level_1
right_chest,9
left_chest,35
right_underarm,0
left_underarm,0



## Mask PII

In [23]:

# Mask voice capture PII. OSU screened all of the **VOICE_COMMAND** and **VOICE_CAPTURE** lines and
# replaced any names with either Max or Jane, regardless of whether the name was that of the responder.
# But, just to make sure...
columns_list = ['voice_command_command_description', 'voice_capture_message']
if not metrics_evaluation_open_world_df[columns_list].applymap(lambda x: '[PERSON]' in str(x), na_action='ignore').sum().sum():
    import spacy
    try: nlp = spacy.load('en_core_web_sm')
    except OSError as e:
        print(str(e).strip())
        command_str = f'{sys.executable} -m spacy download en_core_web_sm --quiet'
        print(command_str)
        !{command_str}
        nlp = spacy.load('en_core_web_sm')
    import en_core_web_sm
    nlp = en_core_web_sm.load()
    
    mask_series = metrics_evaluation_open_world_df.voice_command_command_description.isnull() & metrics_evaluation_open_world_df.voice_capture_message.isnull()
    df = metrics_evaluation_open_world_df[~mask_series]
    def mask_pii(srs):
        for idx in columns_list:
            new_text = srs[idx]
            if notnull(new_text):
                doc = nlp(new_text)
                for entity in doc.ents:
                    if entity.label_ == 'PERSON': new_text = re.sub('\\b' + entity.text + '\\b', '[PERSON]', new_text)
                srs[idx] = new_text
    
        return srs
    
    for row_index, row_series in df.apply(mask_pii, axis='columns')[columns_list].iterrows():
        for column_name, column_value in row_series.items():
            if notnull(column_value): metrics_evaluation_open_world_df.loc[row_index, column_name] = column_value
    
    # Store the results and show the new data frame shape
    nu.store_objects(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    nu.save_data_frames(metrics_evaluation_open_world_df=metrics_evaluation_open_world_df)
    print(metrics_evaluation_open_world_df.shape) # (199476, 109)

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/metrics_evaluation_open_world_df.csv
(171766, 126)


In [24]:

raise

RuntimeError: No active exception to reraise


Conduct some exploratory analysis of the open world segments for the ITM scenarios from the Metrics Evaluation.
For context, results of these analyses is a goal for the 4/30 results meeting (stretch) or the PI meeting (more likely).

<h2>I want to ask the question: which factors contribute to the variance in these outcomes?</h2>
Conceptually, I want an exploratory factor analysis using these IVs and DVs. But I suspect we don’t have enough data for that so as close as we can get to that, let’s get creative.
My thought was to keep the environments separate because each participant did 2 of the environments so if we use 1 to explore, we can use the other to confirm. But again, I recognize we do not have power to do these properly.

<h2>Here is my initial list but I am open to suggestions and modifications</h2>
<h3>IVs (these are not available in the csv; we are working on calculating them now and can get you that info.)</h3>
<ul>
    <li>Participant medical role</li>
    <li>Years of experience</li>
    <li>ST alignment score (continuous or group assignment)</li>
    <li>AD alignment score (continuous or group assignment)</li>
</ul>
<h3>DVs</h3>
<ul>
    <li>Total number of actions</li>
    <li>Count of assessment actions</li>
    <li>Count of treatment actions</li>
    <li>Count of tags applied</li>
    <li>Order of patients engaged</li>
    <li>Tag color for each patient</li>
    <li>Treat expectant patient (yes/no)</li>
    <li>Triage efficiency</li>
    <li>Time to hemorrhage control</li>
</ul>

<h2>The csv files are available: https://nextcentury.atlassian.net/wiki/x/IYDJsgand  and are labeled: </h2>
<ul>
    <li>ITM 3.13.2024.zip; </li>
    <li>ITM 3.14.2024 405F.zip; </li>
    <li>ITM 3.14.2024 405E.zip;</li>
    <li>ITM 3.20.2024 405F.zip; </li>
    <li>ITM 3.20.2024 405E.zip;</li>
    <li>ITM 3.22.2024</li>
</ul>
<h2>In creating this dataset:</h2>
<ul>
    <li>Please keep these data segregated from all others before and after and label it “Metrics Evaluation Open World”</li>
    <li>Please keep the environments labeled: whether it is Jungle, Desert, Submarine, or Urban</li>
</ul>
<h2>We only want to use data from the following characters within each csv (by environment):</h2>
<h3>Desert:</h3>
<ul>
    <li>Open World Marine 1 Female</li>
    <li>Open World Marine 2 Male</li>
    <li>Open World Civilian 1 Male</li>
    <li>Open World Civilian 2 Female</li>
</ul>
<h3>Jungle:</h3>
<ul>
    <li>Open World Marine 1 Male</li>
    <li>Open World Marine 2 Female</li>
    <li>Open World Marine 3 Male</li>
    <li>Open World Marine 4 Male</li>
</ul>
<h3>Submarine:</h3>
<ul>
    <li>Navy Soldier 1 Male</li>
    <li>Navy Soldier 2 Male</li>
    <li>Navy Soldier 3 Male</li>
    <li>Navy Soldier 4 Female</li>
</ul>
<h3>Urban:</h3>
<ul>
    <li>Marine 1 Male</li>
    <li>Marine 2 Male</li>
    <li>Marine 3 Male</li>
    <li>Marine 4 Male</li>
    <li>Civilian 1 Female</li>
</ul>