In [1]:

%pprint
import sys
sys.path.insert(1, '../py')

Pretty printing has been turned OFF


In [2]:

import os
from pandas import DataFrame
import pandas as pd
from notebook_utils import NotebookUtilities
import matplotlib.pyplot as plt
import re
import humanize
from datetime import timedelta

nu = NotebookUtilities(data_folder_path=os.path.abspath('../data'))

In [3]:

# Get all CSVs into one data frame
if nu.pickle_exists('frvrs_logs_df'):
    frvrs_logs_df = nu.load_object('frvrs_logs_df')
    print(frvrs_logs_df.shape)
    # df = frvrs_logs_df.sample(4).dropna(axis='columns', how='all')
    # display(df.T)

(832366, 109)



# Time Spent on Task

Are all mass casualty tasks (wound packing, needle decompression, tourniquet application, triage tag determination) similar in difficulty based on time spent?

In [62]:

# Get time delta for each action type
if nu.pickle_exists('time_spent_df'):
    time_spent_df = nu.load_object('time_spent_df')
else:
    
    # Define lists of action types
    action_types_list = [
        'PULSE_TAKEN', 'INJURY_TREATED', 'TOOL_APPLIED', 'TAG_APPLIED'
    ]
    
    # Define columns for grouping
    groupby_columns = ['session_uuid', 'time_group', 'patient_id']
    
    # Group by patient ID
    gb = frvrs_logs_df.sort_values(['elapsed_time']).groupby(groupby_columns)
    rows_list = []
    for (session_uuid, time_group, patient_id), df1 in gb:
    
        # Get the logger version
        logger_version = df1.logger_version.unique().item()
        
        # Get the first time the patient is noticed
        first_notice = df1.elapsed_time.min()
        previous_time = first_notice
        
        # Get the actions with the patient; group by elapsed time first to get the sort order correct
        mask_series = (df1.action_type.isin(action_types_list))
        for (elapsed_time, action_type), df2 in df1[mask_series].sort_values('elapsed_time').groupby(
            ['elapsed_time', 'action_type']
        ):
            row_dict = {}
            row_dict['logger_version'] = logger_version
            
            # Add the groupby columns to the row dictionary
            for cn in groupby_columns: row_dict[cn] = eval(cn)
            
            # Add the time spent for each action
            row_dict['first_notice'] = first_notice
            row_dict['action_type'] = action_type
            if(action_type == 'TAG_APPLIED'):
                row_dict['tag_applied_type'] = df2.tag_applied_type.squeeze()
            if(action_type == 'TOOL_APPLIED'):
                row_dict['tool_applied_type'] = df2.tool_applied_type.squeeze()
            row_dict['previous_time'] = previous_time
            row_dict['action_time'] = elapsed_time
            row_dict['time_spent'] = elapsed_time - previous_time
            previous_time = elapsed_time
            
            # Add the row dictionary to the list
            rows_list.append(row_dict)
    
    # Create a data frame from the list of row dictionaries
    time_spent_df = DataFrame(rows_list)
    # nu.store_objects(time_spent_df=time_spent_df)

In [63]:

# Display a sample of the data frame, dropping columns with all NaN values and transposing it
display(time_spent_df.sample(min(4, time_spent_df.shape[0])).dropna(axis='columns', how='all').T)

Unnamed: 0,24988,12750,4467,12088
logger_version,1.0,1.0,1.0,1.0
session_uuid,f807c3e2-6cc7-4670-8488-4dd13431cc15,73df0a9b-795b-41e2-97a0-6079fb7ca739,251a4532-ff1f-4182-91ce-4215786a339a,724fdf45-3165-43b8-b9ca-dc07102d2886
time_group,1,1,1,1
patient_id,Lily_2 Root,Helga_10 Root,Gloria_6 Root,Mike_0 Root
first_notice,26756,29521,27244,55908
action_type,INJURY_TREATED,PULSE_TAKEN,TAG_APPLIED,PULSE_TAKEN
previous_time,26756,431342,156008,286664
action_time,59128,438826,169892,289662
time_spent,32372,7484,13884,2998
tag_applied_type,,,yellow,


In [65]:

def get_mean_time_spent(column_name):
    groupby_columns = [column_name]
    display(time_spent_df.groupby(groupby_columns).filter(
        lambda df: not df[column_name].isnull().any()
    ).groupby(groupby_columns).time_spent.mean().sort_values().map(
        lambda x: humanize.precisedelta(timedelta(milliseconds=x))
    ).reset_index())

In [67]:

# Get mean time from last action for each action type
get_mean_time_spent('action_type')

Unnamed: 0,action_type,time_spent
0,TOOL_APPLIED,12.28 seconds
1,PULSE_TAKEN,48.06 seconds
2,TAG_APPLIED,48.24 seconds
3,INJURY_TREATED,1 minute and 20.55 seconds


In [68]:

# Get mean time from last action for each tool type
get_mean_time_spent('tool_applied_type')

Unnamed: 0,tool_applied_type,time_spent
0,Gauze_Pack,6.49 seconds
1,Tourniquet,10.26 seconds
2,Gauze_Dressing,20.22 seconds
3,Naso,30.91 seconds
4,Needle,51.14 seconds


In [69]:

# Get mean time from last action for each tag type
get_mean_time_spent('tag_applied_type')

Unnamed: 0,tag_applied_type,time_spent
0,red,19.95 seconds
1,yellow,35.71 seconds
2,black,37.00 seconds
3,gray,46.63 seconds
4,green,1 minute and 56.26 seconds


In [55]:

# Get patients with the greatest variability in actions
columns_dict = {
    'TOOL_APPLIED': 'variability_in_applying_tools',
    'PULSE_TAKEN': 'variability_in_taking_pulses',
    'TAG_APPLIED': 'variability_in_applying_tags',
    'INJURY_TREATED': 'variability_in_treating_injuries',
}
# columns_list = ['patient_id'] + list(columns_dict.values())
actions_variability_df = DataFrame(time_spent_df.patient_id.unique().tolist(), columns=['patient_id'])
for action_type, column_name in columns_dict.items():
    mask_series = (time_spent_df.action_type == action_type)
    groupby_columns = ['patient_id']
    df = time_spent_df[mask_series].groupby(groupby_columns).filter(
        lambda df: not df.time_spent.isnull().any()
    ).groupby(groupby_columns).time_spent.std().sort_values(ascending=False).reset_index()
    mask_series = df.time_spent.isnull()
    df = df[~mask_series].rename(columns={'time_spent': column_name})
    df[column_name] = df[column_name].map(
        lambda x: '±' + humanize.precisedelta(timedelta(milliseconds=x))
    )
    actions_variability_df = actions_variability_df.merge(df, on='patient_id', how='outer')
actions_variability_df.head(5)

Unnamed: 0,patient_id,variability_in_applying_tools,variability_in_taking_pulses,variability_in_applying_tags,variability_in_treating_injuries
0,Gary_0 Root,±10.49 seconds,±14.04 seconds,±12.53 seconds,±27.85 seconds
1,Bob_0 Root,±1 minute and 33.48 seconds,±1 minute and 43.82 seconds,±1 minute and 30.22 seconds,
2,Bob_9 Root,,±3 minutes and 16.06 seconds,±4 minutes and 4.01 seconds,
3,Gary_1 Root,±1 minute and 18.74 seconds,±1 minute and 21.21 seconds,±1 minute and 7.81 seconds,±58.91 seconds
4,Gary_3 Root,±2 minutes and 10.45 seconds,±1 minute and 9.49 seconds,±40.53 seconds,±1 minute and 11.23 seconds


In [None]:

# Get patients with the greatest variability in tags or treatment; time spent on patient; time spent on tool decision or tag decision; return visits to patient and tag changes