In [1]:

%pprint
import sys
sys.path.insert(1, '../py')

Pretty printing has been turned OFF


In [2]:

import os
from pandas import DataFrame
import pandas as pd
from notebook_utils import NotebookUtilities
import matplotlib.pyplot as plt
import re
import humanize
from datetime import timedelta

nu = NotebookUtilities(data_folder_path=os.path.abspath('../data'))

In [3]:

# Get all CSVs into one data frame
if nu.pickle_exists('frvrs_logs_df'):
    frvrs_logs_df = nu.load_object('frvrs_logs_df')
    print(frvrs_logs_df.shape) # (842663, 112)
    # df = frvrs_logs_df.sample(4).dropna(axis='columns', how='all')
    # display(df.T)

(829277, 113)



## Abstract Final Actions


1.  **Create new sample with clear count of responders and provide final number of responders here: The total number of responders in this data set was as follows: <u data-renderer-mark="true">362</u>**

    1.  Exclude training session and aborted session CSV files from dataset

    2.  Exclude CSV files with more than one triage run

In [4]:

# Loop through each unique file and scene combination
time_groups_dict = {}
mask_series = (frvrs_logs_df.scene_type == 'Triage') & (frvrs_logs_df.is_scene_aborted == False)
for (file_name), df1 in frvrs_logs_df[mask_series].groupby('file_name'):
    actions_list = []
    
    # Add the scene type for this run
    for (scene_index), df2 in df1.groupby('scene_index'):
        scene_type = df2.scene_type.unique().tolist()
        if len(scene_type) != 1: raise
        else: scene_type = scene_type[0]
        actions_list.append(scene_type)
    
    time_groups_dict[file_name] = actions_list
triage_count_df = pd.DataFrame([{'file_name': k, 'triage_count': v.count('Triage')} for k, v in time_groups_dict.items()])
# display(triage_count_df.groupby('triage_count').size().to_frame().rename(columns={0: 'instances_count'}))

# Infer the number of responders from the files with one triage scene
print('''OSU has cleaned/revised the files. The most common problem was that we previously only had one "Gary" as our training so many of these had a "Gary" and then the triage run. (I am keying on "Mike" to exclude the training sessions.) At any rate, it was easier for them to just delete the "Gary" training session from the files. 

There were a few where OSU could not figure out what was going on, so they just deleted them. It is not worth it to fuss with them since we have plenty of data. (OSU noted these in the spreadsheet.)''')
mask_series = (triage_count_df.triage_count == 1)
responders_list = triage_count_df[mask_series].file_name.tolist()
print(
    '\nExcluding training and aborted runs from the dataset, the total number of responders in this data set after the'
    f' OSU revision of the double-triaged files is now {len(responders_list)}'
    ' (assuming responders are represented in one and only one file with one training run in it).'
)

OSU has cleaned/revised the files. The most common problem was that we previously only had one "Gary" as our training so many of these had a "Gary" and then the triage run. (I am keying on "Mike" to exclude the training sessions.) At any rate, it was easier for them to just delete the "Gary" training session from the files. 

There were a few where OSU could not figure out what was going on, so they just deleted them. It is not worth it to fuss with them since we have plenty of data. (OSU noted these in the spreadsheet.)

Excluding training and aborted runs from the dataset, the total number of responders in this data set after the OSU revision of the double-triaged files is now 389 (assuming responders are represented in one and only one file with one training run in it).


In [5]:

# Store the new feature
mask_series = frvrs_logs_df.file_name.isin(responders_list)
new_column_name = 'is_a_one_triage_file'
if (new_column_name not in frvrs_logs_df.columns):
    frvrs_logs_df[new_column_name] = False
    frvrs_logs_df.loc[mask_series, new_column_name] = True
    nu.store_objects(frvrs_logs_df=frvrs_logs_df)
    print(frvrs_logs_df.shape) # (842663, 111)
columns_list = ['session_uuid', 'file_name', 'logger_version', 'scene_index', 'scene_type', 'is_scene_aborted', 'is_a_one_triage_file']
display(nu.get_minority_combinations(frvrs_logs_df[mask_series][columns_list], ['scene_type', 'is_a_one_triage_file']))

Unnamed: 0,session_uuid,file_name,logger_version,scene_index,scene_type,is_scene_aborted,is_a_one_triage_file
113870,33f3d2d4-49b4-43cf-80d4-cd7d5993aca4,Users/DaveBabbitt/Documents/GitHub/itm-analysi...,1.3,0,Orientation,False,True
2312,40efaf29-70b3-435a-8847-bcd4235720be,Users/DaveBabbitt/Documents/GitHub/itm-analysi...,1.3,1,Triage,False,True


In [6]:

# Save the time spent on the first and last triage of the files with two scenes to Excel
file_path = '../saves/xlsx/double_runs.xlsx'
if not os.path.exists(file_path):
    mask_series = (triage_count_df.triage_count == 2)
    file_names_list = triage_count_df[mask_series].file_name.tolist()
    mask_series = frvrs_logs_df.file_name.isin(file_names_list) & (frvrs_logs_df.scene_type == 'Triage')
    rows_list = []
    for (file_name), df1 in frvrs_logs_df[mask_series].groupby('file_name'):
        min_triage = df1.scene_index.min()
        mask_series = (frvrs_logs_df.scene_index == min_triage) & (frvrs_logs_df.file_name == file_name)
        df = frvrs_logs_df[mask_series]
        min_start = df.elapsed_time.min()
        min_stop = df.elapsed_time.max()
        min_elapsed = min_stop - min_start
        
        max_triage = df1.scene_index.max()
        mask_series = (frvrs_logs_df.scene_index == max_triage) & (frvrs_logs_df.file_name == file_name)
        df = frvrs_logs_df[mask_series]
        max_start = df.elapsed_time.min()
        max_stop = df.elapsed_time.max()
        max_elapsed = max_stop - max_start
    
        row_dict = {}
        row_dict['file_name'] = file_name
        row_dict['min_elapsed'] = min_elapsed
        row_dict['max_elapsed'] = max_elapsed
        rows_list.append(row_dict)
    df = DataFrame(rows_list)
    df['elapsed_diff'] = df.max_elapsed - df.min_elapsed
    df.file_name = df.file_name.map(lambda x: nu.get_new_file_name(x))
    
    # Save the DataFrame to an Excel file with the index
    df.sort_values('elapsed_diff').set_index('file_name').applymap(
        lambda x: nu.format_timedelta(timedelta(milliseconds=int(x))), na_action='ignore'
    ).to_excel(file_path, index=True)


2.  **Take the newly created data set and compute the following statistics:**


    a.  Calculate triage time defined as “time start of session until time end of session”

        Median = 7:38, SD = ±2:27, IQR = (6:13, 8:59)

In [7]:

# Get the run's entire history
mask_series = (frvrs_logs_df.scene_type == 'Triage') & (frvrs_logs_df.is_scene_aborted == False) & (frvrs_logs_df.is_a_one_triage_file == True)
groupby_columns = ['session_uuid', 'scene_index']
rows_list = []
for (session_uuid, scene_index), df1 in frvrs_logs_df[mask_series].groupby(groupby_columns):
    row_dict = {}
    for cn in groupby_columns: row_dict[cn] = eval(cn)
    time_start = df1.elapsed_time.min()
    row_dict['time_start'] = time_start
    time_stop = df1.elapsed_time.max()
    row_dict['time_stop'] = time_stop
    row_dict['triage_time'] = time_stop - time_start
    rows_list.append(row_dict)
df = DataFrame(rows_list)
nu.show_time_statistics(df, ['triage_time'])

Unnamed: 0,mean,mode,median,SD,min,25%,50%,75%,max
triage_time,7:36,10:22,7:38,±2:27,11 sec,6:13,7:38,8:59,15:52



    b.  Calculate time to hemorrhage control for scene defined as “time of start scene to time last hemorrhage control (last tourniquet applied)”

        Median = 4:54, SD = ±1:50, IQR = (4:01, 6:12)

In [14]:

base_mask_series = (frvrs_logs_df.scene_type == 'Triage') & (frvrs_logs_df.is_scene_aborted == False) & (frvrs_logs_df.is_a_one_triage_file == True)

# Iterate through each patient in each run in each file
groupby_columns = ['session_uuid', 'scene_index']
rows_list = []
mask_series = base_mask_series & frvrs_logs_df.tool_applied_sender.isin(['AppliedTourniquet', 'AppliedPackingGauze'])
for (session_uuid, scene_index), injury_treated_df in frvrs_logs_df[mask_series].groupby(groupby_columns):
    row_dict = {}
    for cn in groupby_columns: row_dict[cn] = eval(cn)
    
    # Get the entire history of this scene for the run start
    mask_series = True
    for cn in groupby_columns: mask_series &= (frvrs_logs_df[cn] == eval(cn))
    scene_df = frvrs_logs_df[mask_series]
    time_start = scene_df.elapsed_time.min()
    row_dict['scene_start'] = time_start
    
    for patient_id, patient_df in scene_df.groupby('patient_id'):
        row_dict['patient_id'] = patient_id
        
        # Is this patient bleeding?
        bleeding_mask_series = patient_df.injury_treated_required_procedure.isin(['tourniquet', 'woundpack'])
        if patient_df[bleeding_mask_series].shape[0]:
            
            # Get the time of patient engagement
            mask_series = patient_df.action_type.isin(['PATIENT_ENGAGED'])
            patient_engaged = patient_df[mask_series].elapsed_time.min()
            row_dict['patient_engaged'] = patient_engaged
            
            # Get the entire history of these injuries
            injury_ids_list = patient_df[bleeding_mask_series].injury_id.tolist()
            injury_mask_series = True
            for cn in groupby_columns: injury_mask_series &= (frvrs_logs_df[cn] == eval(cn))
            injury_mask_series &= (frvrs_logs_df.patient_id == patient_id)
            injury_mask_series &= frvrs_logs_df.injury_id.isin(injury_ids_list)
            injury_records_df = frvrs_logs_df[injury_mask_series]
            
            # Was the bleeding treated?
            mask_series = (injury_records_df.injury_treated_injury_treated == True)
            if injury_records_df[mask_series].shape[0]:
                
                # Get the last tourniquet applied
                time_stop = injury_records_df[mask_series].elapsed_time.max()
                row_dict['tool_applied'] = time_stop
                
                # Get the time elapsed
                row_dict['run_control_time'] = time_stop - time_start
                row_dict['engaged_control_time'] = time_stop - patient_engaged
    
    rows_list.append(row_dict)
run_control_time_df = DataFrame(rows_list)

In [15]:

if 'run_control_time' in run_control_time_df.columns: display(run_control_time_df[~run_control_time_df.run_control_time.isnull()])

Unnamed: 0,session_uuid,scene_index,scene_start,patient_id,patient_engaged,tool_applied,run_control_time,engaged_control_time
0,009b848c-ea64-4f22-bd40-711213a3d327,0,296666,Mike_7 Root,381184.0,557878.0,261212.0,176694.0
1,00b2e9d8-c3bb-4d89-92c1-abce5c4a0655,1,436309,Mike_7 Root,676778.0,686541.0,250232.0,9763.0
2,02f6a42e-5211-40ea-8384-824a83ab6760,0,2141,Mike_7 Root,460081.0,469173.0,467032.0,9092.0
3,03b398c5-6fc8-459c-9662-ce1e0babe4b1,0,4275,Mike_7 Root,183837.0,204823.0,200548.0,20986.0
4,040ba585-ccae-45ef-b4bd-a0e35ec222b0,1,221963,Mike_7 Root,276759.0,306314.0,84351.0,29555.0
...,...,...,...,...,...,...,...,...
367,fe077b96-8e52-4e1a-8ec7-e5c5f46e65c5,0,197304,Mike_7 Root,512675.0,476755.0,279451.0,20476.0
368,fe2602f0-e9e8-4913-8bf3-806852ce738a,1,136772,Mike_7 Root,379593.0,398143.0,261371.0,18550.0
369,ff4c3532-5924-45b6-9a6a-d0d3274b6ef0,1,96968,Military Mike Jungle Scout_1_3 Root,145380.0,150881.0,53913.0,5501.0
370,ff97fc56-c803-4aa7-b419-35e0ac17d0b3,1,197239,Mike_7 Root,358017.0,367218.0,169979.0,9201.0


In [16]:

columns_list = ['run_control_time']
df = DataFrame(rows_list).groupby(groupby_columns)[columns_list].max()
nu.show_time_statistics(df, columns_list)

Unnamed: 0,mean,mode,median,SD,min,25%,50%,75%,max
run_control_time,3:56,25 sec,3:51,±1:44,25 sec,2:41,3:51,4:52,10:49



    c.  Calculate hemorrhage control per patient defined as “time patient engaged to time tool applied (tourniquet) for that patient”

        1.  Median = XX, SD = X:XX, IQR = X:XX, X:XX)

In [73]:

columns_list = ['engaged_control_time']
df = DataFrame(rows_list).groupby(groupby_columns)[columns_list].mean()
nu.show_time_statistics(df, columns_list)

Unnamed: 0,mean,mode,median,SD,min,25%,50%,75%,max
engaged_control_time,21,-1:58,13,±25,-1:58,9,13,21,4:07



3.  **Calculate the proportion of responders that consistently evaluated patients appropriately in this order: still, able to wave, and able to walk.**

    a.  Responders who evaluated patients in correct order: **<u data-renderer-mark="true">6.94%</u>**

In [79]:

columns_list = ['scene_type', 'is_scene_aborted', 'is_a_one_triage_file']
frvrs_logs_df[columns_list].drop_duplicates().sort_values(columns_list)

Unnamed: 0,scene_type,is_scene_aborted,is_a_one_triage_file
56265,Orientation,False,False
0,Orientation,False,True
56267,Triage,False,False
228,Triage,False,True


In [102]:

import numpy as np

engagment_sort_df = nu.load_object('engagment_sort_df')
groupby_columns = ['session_uuid', 'scene_index']
rows_list = []
idx_list = []
base_mask_series = (engagment_sort_df.scene_type == 'Triage') & (engagment_sort_df.is_scene_aborted == False) & (engagment_sort_df.is_a_one_triage_file == True)
for (session_uuid, scene_index), scene_df in engagment_sort_df[base_mask_series].groupby(groupby_columns):
    row_dict = {}
    for cn in groupby_columns: row_dict[cn] = eval(cn)
        
    mask_series = (scene_df.rsquared_adj == 1.0)
    count_correct = scene_df[mask_series].shape[0]
    row_dict['count_correct'] = count_correct
    
    mask_series = (scene_df.rsquared_adj < 1.0)
    count_incorrect = scene_df[mask_series].shape[0]
    row_dict['count_incorrect'] = count_incorrect

    total_count = count_correct + count_incorrect
    row_dict['total_count'] = total_count
    
    try: percentage_correct = 100*count_correct/total_count
    except ZeroDivisionError:
        percentage_correct = np.nan
        idx_list.append(scene_df.index.tolist()[0])
    row_dict['percentage_correct'] = percentage_correct
    
    rows_list.append(row_dict)
df = DataFrame(rows_list)

In [103]:

mask_series = engagment_sort_df.index.isin(idx_list)
engagment_sort_df[mask_series]

Unnamed: 0,session_uuid,scene_index,logger_version,is_scene_aborted,scene_type,is_a_one_triage_file,last_threat_engaged,last_walker_engaged,rsquared_adj,last_still_engaged,last_waver_engaged
34,0950437e-94b0-45bf-8d11-dd446fd0e4a2,0,1.0,False,Triage,True,,68436.0,,,
59,1066671d-2a1d-4744-b66f-e4b48548701f,0,1.3,False,Triage,True,,,,,
73,158e6365-673b-4030-8b36-6704be5996a2,0,1.0,False,Triage,True,,,,,
110,2310f107-d9d2-418e-a2d7-dd7a17924544,0,1.0,False,Triage,True,,,,,
242,5325cdbf-d627-4aba-b5b4-9a62240d599f,0,1.0,False,Triage,True,,38403.0,,64732.0,
243,54aaf31a-22bc-46f2-a810-8564161bf8d0,0,1.0,False,Triage,True,,,,,
251,5c2a444a-9c8d-4c65-bef2-c5f47f6b258d,0,1.0,False,Triage,True,,,,,68077.0
260,5da6af4d-f3bc-4e05-9c5b-cb7d5aa68e7e,1,1.3,False,Triage,True,,,,97371.0,104616.0
356,7c2549d4-97a4-4389-bd03-029396714f59,0,1.3,False,Triage,True,,,,,
407,8ec8afba-8533-4915-898f-5769c1258c61,0,1.3,False,Triage,True,,,,241847.0,


In [93]:

columns_list = ['total_count', 'percentage_correct']
nu.get_minority_combinations(df, columns_list)

Unnamed: 0,session_uuid,scene_index,count_correct,count_incorrect,total_count,percentage_correct
38,1cfebb9a-6000-4742-81be-6103ebe168bc,1,1,0,1,100.0
294,d8fe95a5-c1fb-4239-8b42-12d28c35e111,0,0,1,1,0.0


In [106]:

ave_percentage_correct = df.percentage_correct.mean()#, 100*df.count_correct.sum()/df.total_count.sum()
print(
    f'Responders who evaluated patients in correct order: {ave_percentage_correct:.2f}%'
    f' (100*{df.count_correct.sum()}/{df.total_count.sum()}, {len(responders_list)-df.total_count.sum()} did not have patients with patient_engaged_sort designations).'
    )

Responders who evaluated patients in correct order: 6.94% (100*24/346, 16 did not have patients with patient_engaged_sort designations).
