In [1]:

%pprint
import sys
sys.path.insert(1, '../py')

Pretty printing has been turned OFF


In [2]:

import os
from pandas import DataFrame
import pandas as pd
from notebook_utils import NotebookUtilities
import matplotlib.pyplot as plt
import re
import humanize
from datetime import timedelta

nu = NotebookUtilities(data_folder_path=os.path.abspath('../data'))

In [3]:

# Get all CSVs into one data frame
if nu.pickle_exists('frvrs_logs_df'):
    frvrs_logs_df = nu.load_object('frvrs_logs_df')
    print(frvrs_logs_df.shape)
    # df = frvrs_logs_df.sample(4).dropna(axis='columns', how='all')
    # display(df.T)

(832366, 110)



# Time Spent on Task

Are all mass casualty tasks (wound packing, needle decompression, tourniquet application, triage tag determination) similar in difficulty based on time spent?

In [4]:

# Get time delta for each action type
if nu.pickle_exists('time_spent_df'):
    time_spent_df = nu.load_object('time_spent_df')
else:
    
    # Define lists of action types
    action_types_list = [
        'PULSE_TAKEN', 'INJURY_TREATED', 'TOOL_APPLIED', 'TAG_APPLIED'
    ]
    
    # Define columns for grouping
    groupby_columns = ['session_uuid', 'scene_index', 'patient_id']
    
    # Group by patient ID
    gb = frvrs_logs_df.sort_values(['elapsed_time']).groupby(groupby_columns)
    rows_list = []
    for (session_uuid, scene_index, patient_id), df1 in gb:
    
        # Get the logger version
        logger_version = df1.logger_version.unique().item()
        
        # Get the first time the patient is noticed
        first_notice = df1.elapsed_time.min()
        previous_time = first_notice
        
        # Get the actions with the patient; group by elapsed time first to get the sort order correct
        mask_series = (df1.action_type.isin(action_types_list))
        for (elapsed_time, action_type), df2 in df1[mask_series].sort_values('elapsed_time').groupby(
            ['elapsed_time', 'action_type']
        ):
            if(action_type == 'INJURY_TREATED'):
                for (
                    injury_treated_id, injury_treated_required_procedure, injury_treated_severity, injury_treated_body_region
                ) in zip(
                    df2.injury_treated_id, df2.injury_treated_required_procedure, df2.injury_treated_severity,
                    df2.injury_treated_body_region
                ):
                    row_dict = {}
                    row_dict['logger_version'] = logger_version
                    
                    # Add the groupby columns to the row dictionary
                    for cn in groupby_columns: row_dict[cn] = eval(cn)
                    
                    # Add the time spent for each action
                    row_dict['first_notice'] = first_notice
                    row_dict['action_type'] = action_type
                    row_dict['injury_treated_id'] = injury_treated_id
                    row_dict['injury_treated_required_procedure'] = injury_treated_required_procedure
                    row_dict['injury_treated_severity'] = injury_treated_severity
                    row_dict['injury_treated_body_region'] = injury_treated_body_region
                    row_dict['previous_time'] = previous_time
                    row_dict['action_time'] = elapsed_time
                    row_dict['time_spent'] = elapsed_time - previous_time
                    previous_time = elapsed_time
                    
                    # Add the row dictionary to the list
                    rows_list.append(row_dict)
            else:
                row_dict = {}
                row_dict['logger_version'] = logger_version
                
                # Add the groupby columns to the row dictionary
                for cn in groupby_columns: row_dict[cn] = eval(cn)
                
                # Add the time spent for each action
                row_dict['first_notice'] = first_notice
                row_dict['action_type'] = action_type
                if(action_type == 'TAG_APPLIED'):
                    row_dict['tag_applied_type'] = df2.tag_applied_type.squeeze()
                if(action_type == 'TOOL_APPLIED'):
                    row_dict['tool_applied_type'] = df2.tool_applied_type.squeeze()
                row_dict['previous_time'] = previous_time
                row_dict['action_time'] = elapsed_time
                row_dict['time_spent'] = elapsed_time - previous_time
                previous_time = elapsed_time
                
                # Add the row dictionary to the list
                rows_list.append(row_dict)
    
    # Create a data frame from the list of row dictionaries
    time_spent_df = DataFrame(rows_list)
    # nu.store_objects(time_spent_df=time_spent_df)

In [91]:

# Display a sample of the data frame, dropping columns with all NaN values and transposing it
mask_series = ~time_spent_df.injury_treated_id.isnull()
df = time_spent_df[mask_series]
display(df.sample(min(4, df.shape[0])).dropna(axis='columns', how='all').T)

Unnamed: 0,1061,16501,10388,7759
logger_version,1.3,1.0,1.3,1.3
session_uuid,0786a1df-d010-4b1b-a99a-e00df486d479,a0987257-801e-44c5-a1ad-81e0083bfa46,61dd1584-76e8-4536-9aeb-15276e646ff4,43313faf-6031-4433-be70-d5d828f55cff
scene_index,1,8,2,1
patient_id,Lily_2 Root,Gary_1 Root,Mike_7 Root,Mike_1 Root
first_notice,21828,3248915,372681,129343
action_type,INJURY_TREATED,INJURY_TREATED,INJURY_TREATED,INJURY_TREATED
injury_treated_id,R Shin Amputation,Face Shrapnel,R Calf Laceration,L Thigh Laceration
injury_treated_required_procedure,tourniquet,airway,gauzePressure,tourniquet
injury_treated_severity,high,high,low,medium
injury_treated_body_region,rightLeg,head,rightLeg,leftLeg


In [112]:

def get_mean_time_spent(column_name):
    groupby_columns = [column_name]
    df = time_spent_df.groupby(groupby_columns).filter(
        lambda df: not df[column_name].isnull().any()
    ).groupby(groupby_columns).time_spent.mean().sort_values().map(
        lambda x: humanize.precisedelta(timedelta(milliseconds=x))
    ).reset_index()
    df = df.rename(columns={'time_spent': 'mean_time_spent'})
    
    return df

In [117]:

# Get mean time from last action for each body region
get_mean_time_spent('injury_treated_body_region')

Unnamed: 0,injury_treated_body_region,mean_time_spent
0,neck,28.90 seconds
1,head,54.68 seconds
2,abdomen,1 minute and 0.10 seconds
3,chest,1 minute and 0.23 seconds
4,rightArm,1 minute and 10.21 seconds
5,rightLeg,1 minute and 30.23 seconds
6,leftLeg,1 minute and 32.56 seconds
7,leftArm,1 minute and 51.83 seconds


In [116]:

# Get mean time from last action for each treatment severity
get_mean_time_spent('injury_treated_severity')

Unnamed: 0,injury_treated_severity,mean_time_spent
0,high,1 minute and 8.53 seconds
1,low,1 minute and 18.67 seconds
2,medium,1 minute and 26.66 seconds


In [118]:

# Get mean time from last action for each treatment procedure
get_mean_time_spent('injury_treated_required_procedure')

Unnamed: 0,injury_treated_required_procedure,mean_time_spent
0,none,10.66 seconds
1,airway,39.90 seconds
2,decompress,41.61 seconds
3,woundpack,1 minute and 10.88 seconds
4,gauzePressure,1 minute and 18.30 seconds
5,tourniquet,1 minute and 42.34 seconds


In [123]:

# Get mean time from last action for each injury type, quickest
get_mean_time_spent('injury_treated_id').head(5)

Unnamed: 0,injury_treated_id,mean_time_spent
0,L Chest Collapse,7.31 seconds
1,R Bicep Puncture,15.07 seconds
2,L Bicep Puncture,18.41 seconds
3,L Stomach Puncture,24.58 seconds
4,R Side Puncture,28.67 seconds


In [122]:

# Get mean time from last action for each injury type, slowest
get_mean_time_spent('injury_treated_id').tail(5)

Unnamed: 0,injury_treated_id,mean_time_spent
19,R Shin Amputation,1 minute and 53.55 seconds
20,L Forearm Laceration,1 minute and 54.26 seconds
21,Forehead Scrape,2 minutes and 3.53 seconds
22,R Shoulder Puncture,2 minutes and 13.40 seconds
23,R Wrist Amputation,2 minutes and 35.27 seconds


In [67]:

# Get mean time from last action for each action type
get_mean_time_spent('action_type')

Unnamed: 0,action_type,time_spent
0,TOOL_APPLIED,12.28 seconds
1,PULSE_TAKEN,48.06 seconds
2,TAG_APPLIED,48.24 seconds
3,INJURY_TREATED,1 minute and 20.55 seconds


In [68]:

# Get mean time from last action for each tool type
get_mean_time_spent('tool_applied_type')

Unnamed: 0,tool_applied_type,time_spent
0,Gauze_Pack,6.49 seconds
1,Tourniquet,10.26 seconds
2,Gauze_Dressing,20.22 seconds
3,Naso,30.91 seconds
4,Needle,51.14 seconds


In [69]:

# Get mean time from last action for each tag type
get_mean_time_spent('tag_applied_type')

Unnamed: 0,tag_applied_type,time_spent
0,red,19.95 seconds
1,yellow,35.71 seconds
2,black,37.00 seconds
3,gray,46.63 seconds
4,green,1 minute and 56.26 seconds


In [124]:

def get_patient_variability(columns_dict, column_name):
    variability_df = DataFrame(time_spent_df.patient_id.unique().tolist(), columns=['patient_id'])
    for column_value, time_spent_rename in columns_dict.items():
        mask_series = (time_spent_df[column_name] == column_value)
        groupby_columns = ['patient_id']
        df = time_spent_df[mask_series].groupby(groupby_columns).filter(
            lambda df: not df.time_spent.isnull().any()
        ).groupby(groupby_columns).time_spent.std().sort_values(ascending=False).reset_index()
        mask_series = df.time_spent.isnull()
        df = df[~mask_series].rename(columns={'time_spent': time_spent_rename})
        df[time_spent_rename] = df[time_spent_rename].map(
            lambda x: '±' + humanize.precisedelta(timedelta(milliseconds=x))
        )
        variability_df = variability_df.merge(df, on='patient_id', how='outer')
    
    return variability_df

In [125]:

# Get patients with the greatest variability in actions
columns_dict = {
    'TOOL_APPLIED': 'variability_in_applying_tools',
    'PULSE_TAKEN': 'variability_in_taking_pulses',
    'TAG_APPLIED': 'variability_in_applying_tags',
    'INJURY_TREATED': 'variability_in_treating_injuries',
}
get_patient_variability(columns_dict, 'action_type').head(5)

Unnamed: 0,patient_id,variability_in_applying_tools,variability_in_taking_pulses,variability_in_applying_tags,variability_in_treating_injuries
0,Gary_0 Root,±10.49 seconds,±14.04 seconds,±12.53 seconds,±27.85 seconds
1,Bob_0 Root,±1 minute and 33.48 seconds,±1 minute and 43.82 seconds,±1 minute and 30.22 seconds,
2,Bob_9 Root,,±3 minutes and 16.06 seconds,±4 minutes and 4.01 seconds,
3,Gary_1 Root,±1 minute and 18.74 seconds,±1 minute and 21.21 seconds,±1 minute and 7.81 seconds,±58.91 seconds
4,Gary_3 Root,±2 minutes and 10.45 seconds,±1 minute and 9.49 seconds,±40.53 seconds,±1 minute and 11.23 seconds


In [126]:

# Get patients with the greatest variability in tags
columns_dict = {
    'red': 'variability_in_applying_red',
    'yellow': 'variability_in_applying_yellow',
    'black': 'variability_in_applying_black',
    'gray': 'variability_in_applying_gray',
    'green': 'variability_in_applying_green',
}
get_patient_variability(columns_dict, 'tag_applied_type').head(5)

Unnamed: 0,patient_id,variability_in_applying_red,variability_in_applying_yellow,variability_in_applying_black,variability_in_applying_gray,variability_in_applying_green
0,Gary_0 Root,±15.08 seconds,±11.84 seconds,±12.72 seconds,±17.84 seconds,±3.68 seconds
1,Bob_0 Root,±2 minutes and 22.97 seconds,,±1 minute and 29.50 seconds,±1 minute and 8.15 seconds,±1 minute and 53.27 seconds
2,Bob_9 Root,,±3 minutes and 27.49 seconds,,±4 minutes and 19.91 seconds,±4 minutes and 13.21 seconds
3,Gary_1 Root,±1 minute and 46.77 seconds,±6.18 seconds,±41.77 seconds,±1 minute and 7.96 seconds,±2.78 seconds
4,Gary_3 Root,±22.53 seconds,±36.77 seconds,±2 minutes and 17.37 seconds,±1 minute and 9.98 seconds,


In [128]:

# Get patients with the greatest variability in treatment
columns_dict = {
    'none': 'variability_in_applying_none',
    'airway': 'variability_in_applying_airway',
    'decompress': 'variability_in_applying_decompress',
    'woundpack': 'variability_in_applying_woundpack',
    'gauzePressure': 'variability_in_applying_gauzePressure',
    'tourniquet': 'variability_in_applying_tourniquet',
}
df = get_patient_variability(columns_dict, 'injury_treated_required_procedure')
mask_series = ~df.variability_in_applying_woundpack.isnull()
df[mask_series].head(4).T

Unnamed: 0,9,10,14,20
patient_id,Lily_4 Root,Mike_5 Root,Mike_2 Root,Helga_0 Root
variability_in_applying_none,,,,
variability_in_applying_airway,,,,
variability_in_applying_decompress,,,,
variability_in_applying_woundpack,±2 minutes and 1.97 seconds,±1 minute and 12.65 seconds,±1 minute and 58.33 seconds,±54.67 seconds
variability_in_applying_gauzePressure,,±2 minutes and 19.81 seconds,,
variability_in_applying_tourniquet,,±2 minutes and 7.87 seconds,,±2 minutes and 1.18 seconds


In [None]:

# Get patients with the greatest variability in tags or treatment; time spent on patient; time spent on tool decision or tag decision; return visits to patient and tag changes