In [1]:

# Set up notebook
%pprint
import sys
if (osp.join('..', 'py') not in sys.path): sys.path.insert(1, osp.join('..', 'py'))

Pretty printing has been turned OFF


In [2]:

# load libraries
from FRVRS import (fu, nu, display, read_excel, nan, Series, DataFrame)
from datetime import date, timedelta
import os
from IPython.display import HTML
import pandas as pd

In [3]:

# load data frames
data_frames_dict = nu.load_data_frames(
    first_responder_master_registry_df='', first_responder_master_registry_file_stats_df='',
    first_responder_master_registry_scene_stats_df='', count_swaps_df='',
    verbose=False
)
triage_paper_df = data_frames_dict['first_responder_master_registry_df']
print('triage_paper_df', triage_paper_df.shape[0], triage_paper_df.session_uuid.nunique(), triage_paper_df.shape[1]) # 367627 290 111
file_stats_df = data_frames_dict['first_responder_master_registry_file_stats_df']
print('file_stats_df', file_stats_df.shape[0], file_stats_df.file_name.nunique(), file_stats_df.shape[1]) # 290 290 19
scene_stats_df = data_frames_dict['first_responder_master_registry_scene_stats_df']
print('scene_stats_df', scene_stats_df.shape[0], scene_stats_df.session_uuid.nunique(), scene_stats_df.shape[1]) # 420 290 48
count_swaps_df = data_frames_dict['count_swaps_df']
print('count_swaps_df', count_swaps_df.shape[0], count_swaps_df.session_uuid.nunique(), count_swaps_df.shape[1]) # 289 289 3

triage_paper_df 367627 290 111
file_stats_df 290 290 19
scene_stats_df 420 290 48
count_swaps_df 315 289 3


In [7]:

mask_series = ~triage_paper_df.patient_salt.isnull()
display(triage_paper_df[mask_series].patient_salt.unique())
triage_paper_df[mask_series].patient_salt.unique().min()

['EXPECTANT', 'DEAD', 'IMMEDIATE', 'DELAYED', 'MINIMAL']
Categories (5, object): ['DEAD' < 'EXPECTANT' < 'IMMEDIATE' < 'DELAYED' < 'MINIMAL']

'DEAD'

In [8]:

patient_salts_srs = Series(triage_paper_df[mask_series].patient_salt).astype(fu.salt_category_order)
patient_salts_srs.min()

'DEAD'

In [5]:

groups_list = ['Paramedic', 'EMT-Basic', 'EM Resident', 'Medical Student']
patient_count_filter_fn = lambda scene_df: True
nongrouped_patients_list = nu.load_object('nongrouped_patients_list')

In [9]:

# Merge only count swaps scenes
elevens_df = fu.get_elevens_dataframe(
    triage_paper_df,
    file_stats_df,
    scene_stats_df,
    needed_columns=[
        'scene_type', 'is_scene_aborted', 'is_a_one_triage_file', 'responder_category', 'responder_type', 'overall_category',
        'file_name', 'session_file_name', 'encounter_layout'
    ],
    patient_count_filter_fn=None
)
print('elevens_df', elevens_df.shape[0], elevens_df.session_uuid.nunique(), elevens_df.shape[1]) # 397230 331 121

# Get a dataset of only first11 patients
mask_series = ~elevens_df.patient_id.isin(nongrouped_patients_list)
elevens_df = elevens_df[mask_series]
print('elevens_df', elevens_df.shape[0], elevens_df.session_uuid.nunique(), elevens_df.shape[1]) # 390108 331 121

# Get a dataset of only "First 11" encounter layouts
mask_series = (elevens_df.encounter_layout == 'DCEMS 11')
display(elevens_df.groupby('encounter_layout').size().to_frame().rename(columns={0: 'record_count'}).sort_values(
    'record_count', ascending=False
).head(5))
elevens_df = elevens_df[~mask_series]
print('elevens_df', elevens_df.shape[0], elevens_df.session_uuid.nunique(), elevens_df.shape[1]) # 367025 295 119

mask_series = elevens_df.responder_category.isin(groups_list)
elevens_df = elevens_df[mask_series]
print('elevens_df', elevens_df.shape[0], elevens_df.session_uuid.nunique(), elevens_df.shape[1]) # 367025 295 119

elevens_df 365543 289 120
elevens_df 365521 289 120


Unnamed: 0_level_0,record_count
encounter_layout,Unnamed: 1_level_1
First 11,365238


elevens_df 365521 289 120
elevens_df 358586 282 120


In [10]:

# load data frame
data_frames_dict = nu.load_data_frames(
    first11_optimal_order_df='',
    verbose=False
)
first11_optimal_order_df = data_frames_dict['first11_optimal_order_df']
print('first11_optimal_order_df', first11_optimal_order_df.shape) # (11, 7)

first11_optimal_order_df (12, 7)


In [11]:

# Get priority group dictionary and set a column with in the merge dataset
mask_series = ~first11_optimal_order_df.Patient.isnull()
priority_group_dict = first11_optimal_order_df[mask_series].set_index('Patient').Group.to_dict()

mask_series = ~first11_optimal_order_df.Patient.isnull()
first11_patients_list = sorted(first11_optimal_order_df[mask_series].Patient.unique())
print(priority_group_dict)
mask_series = elevens_df.patient_id.isin(first11_patients_list)
elevens_df.loc[mask_series, 'priority_group'] = elevens_df.loc[mask_series, 'patient_id'].map(priority_group_dict)

{'Gary_3 Root': 1, 'Lily_2 Root': 1, 'Bob_0 Root': 1, 'Gary_1 Root': 1, 'Mike_5 Root': 1, 'Lily_4 Root': 1, 'Mike_7 Root': 1, 'Gloria_6 Root': 2, 'Bob_9 Root': 3, 'Gloria_8 Root': 3, 'Helga_10 Root': 3, 'Gary_9 Root': 3}


In [15]:

data_frames_dict = nu.load_data_frames(swaps_walk_wave_anova_df='', verbose=False)
anova_df = data_frames_dict['swaps_walk_wave_anova_df']
anova_df['scene_id'] = anova_df['scene_id'].fillna(0)
for (session_uuid, scene_id), idx_df in anova_df.groupby(fu.scene_groupby_columns):

    # Get the whole scene history
    mask_series = True
    for cn in fu.scene_groupby_columns: mask_series &= (elevens_df[cn] == eval(cn))
    scene_df = elevens_df[mask_series]

    # Add the swap measure
    # actual_sequence, ideal_sequence, sort_dict = fu.get_actual_and_ideal_patient_sort_sequences(scene_df)
    actual_sequence, ideal_sequence, sort_dict = fu.get_actual_and_ideal_priority_group_sequences(scene_df)
    # print(actual_sequence, ideal_sequence, sort_dict); raise
    unsort_dict = {v1: k for k, v in sort_dict.items() for v1 in v}
    # print([unsort_dict[i] for i in ideal_sequence], [unsort_dict[a] for a in actual_sequence]); raise
    swaps_to_perfect_order = nu.count_swaps_to_perfect_order(
        [unsort_dict[i] for i in ideal_sequence], [unsort_dict[a] for a in actual_sequence]
    )
    # print(session_uuid, scene_id, swaps_to_perfect_order); raise
    anova_df.loc[idx_df.index, 'swaps_to_perfect_order'] = swaps_to_perfect_order
    
    # Add the walker issue command
    # is_walk_command_issued = fu.get_walk_value(scene_df)
    # anova_df.loc[idx_df.index, 'walk_command_issued'] = is_walk_command_issued
    
    # Add the waver issue command
    # is_wave_command_issued = fu.get_wave_value(scene_df)
    # anova_df.loc[idx_df.index, 'wave_command_issued'] = is_wave_command_issued
nu.save_data_frames(swaps_walk_wave_anova_df=anova_df, verbose=False)
print()
display(anova_df.swaps_to_perfect_order.value_counts().sort_index())




0.0     57
1.0    135
2.0     44
3.0     46
4.0      2
5.0      4
Name: swaps_to_perfect_order, dtype: int64

In [15]:

file_path = '../data/xlsx/anova290_df_Apr24_Final.xlsx'
anova_apr24_final_df = pd.read_excel(file_path)

In [6]:

def get_mean_percentage_tag_correct(percentage_tag_correct_df, session_uuid, scene_id, responder_category):
    mean_percentage_tag_correct = 0
    mask_series = (percentage_tag_correct_df.session_uuid == session_uuid) & (percentage_tag_correct_df.scene_id == scene_id)
    mask_series &= (percentage_tag_correct_df.responder_category == responder_category)
    if mask_series.any(): mean_percentage_tag_correct = percentage_tag_correct_df[mask_series].percentage_tag_correct.mean()
    
    return mean_percentage_tag_correct

In [7]:

# Create the tag-to-SALT data frame
assert len(set(elevens_df.columns).intersection(set(fu.patient_groupby_columns))) == 3, "You don't have all the patient columns"
tag_to_salt_df = fu.get_is_tag_correct_dataframe(elevens_df, groupby_column='responder_category')

In [8]:

# Get the percentage tag correct counts for each scene for each group
percentage_tag_correct_df = fu.get_percentage_tag_correct_dataframe(tag_to_salt_df, groupby_column='responder_category')

In [9]:

remove_mask_series = (percentage_tag_correct_df.percentage_tag_correct == 0)
session_uuids_list = percentage_tag_correct_df[remove_mask_series].session_uuid
mask_series = percentage_tag_correct_df.session_uuid.isin(session_uuids_list)
display(percentage_tag_correct_df[mask_series])
percentage_tag_correct_df = percentage_tag_correct_df[~remove_mask_series]

Unnamed: 0,session_uuid,scene_id,responder_category,percentage_tag_correct
36,1e7d274d-f393-4888-9f28-4de53b0260e3,0,Paramedic,0.0
37,1e7d274d-f393-4888-9f28-4de53b0260e3,1,Paramedic,72.727273
171,9a6d2f32-10fb-40ea-bacc-dd6a7d2f4a13,0,Medical Student,0.0
172,9a6d2f32-10fb-40ea-bacc-dd6a7d2f4a13,1,Medical Student,36.363636
185,a7804ee3-6a1c-4462-957a-17976a540483,0,EM Resident,0.0
186,a7804ee3-6a1c-4462-957a-17976a540483,1,EM Resident,81.818182


In [12]:

# Compute error_type, removing "Not Tagged" from consideration
def f(df):
    if df.last_tag in fu.error_table_df.index: error_type = fu.error_table_df[fu.error_table_df.index == df.last_tag][df.max_salt].squeeze()
    else: error_type = nan

    return error_type
groupby_columns = ['session_uuid', 'last_tag', 'max_salt', 'responder_category']
anova_error_types_df = tag_to_salt_df.groupby(groupby_columns).patient_count.sum().to_frame().reset_index(drop=False)
anova_error_types_df['error_type'] = anova_error_types_df.apply(f, axis='columns')

# Convert the columns to the custom categorical type
anova_error_types_df['last_tag'] = anova_error_types_df['last_tag'].astype(fu.colors_category_order)
anova_error_types_df['max_salt'] = anova_error_types_df['max_salt'].astype(fu.salt_category_order)
anova_error_types_df['error_type'] = anova_error_types_df['error_type'].astype(fu.errors_category_order)

# Sort the Data Frame based on the custom categorical orders
anova_error_types_df = anova_error_types_df.sort_values(by=groupby_columns+['error_type'])

In [13]:

# Get triage error rates
anova_error_rates_df = fu.create_triage_error_rates_dataframe(anova_error_types_df, groupby_columns=['session_uuid', 'responder_category'])

In [14]:

mask_series = (elevens_df.scene_type == 'Triage') & (elevens_df.is_scene_aborted == False)
gb = elevens_df[mask_series].sort_values(['session_uuid']).groupby(['file_name', 'session_file_name', 'session_uuid', 'responder_category', 'encounter_layout'])
rows_list = []
for i, ((file_name, session_file_name, session_uuid, responder_category, encounter_layout), session_df) in enumerate(gb):

    # Get the tuple with the largest percentage correct
    largest_tuple = sorted(
        [(scene_df, scene_df.shape[0], scene_id) for scene_id, scene_df in session_df.groupby('scene_id')],
        key=lambda x: get_mean_percentage_tag_correct(percentage_tag_correct_df, session_uuid, x[2], responder_category)
    )[-1]
    
    responder_categories_df = largest_tuple[0]
    scene_id = largest_tuple[2]
    # scene_start = fu.get_scene_start(responder_categories_df)
    row_dict = {
        'ID': f'Responder {i+1}', 'Group': responder_category, 'file_name': file_name, 'session_file_name': session_file_name,
        'session_uuid': session_uuid, 'scene_id': scene_id
    }
    row_dict['time_to_triage_scene'] = fu.get_triage_time(responder_categories_df)
    row_dict['time_to_last_hemorrhage_controlled'] = fu.get_time_to_last_hemorrhage_controlled(responder_categories_df)
    
    # Duration of time from when the patient was first approached by the participant until
    # the time hemorrhage treatment was applied (with a tourniquet or wound packing)
    row_dict['time_to_hemorrhage_control_per_patient'] = fu.get_time_to_hemorrhage_control_per_patient(responder_categories_df)
    
    mask_series = (percentage_tag_correct_df.session_uuid == session_uuid) & (percentage_tag_correct_df.scene_id == scene_id)
    mask_series &= (percentage_tag_correct_df.responder_category == responder_category)
    if mask_series.any():
        row_dict['mean_percentage_tag_correct'] = percentage_tag_correct_df[mask_series].percentage_tag_correct.mean()
        
        # Add the triage errors
        mask_series = (anova_error_rates_df.responder_category == responder_category) & (anova_error_rates_df.session_uuid == session_uuid)
        over_triage_error_rate = anova_error_rates_df[mask_series].over_triage_error_rate.squeeze()
        if not isinstance(over_triage_error_rate, Series): row_dict['over_triage_error_rate'] = over_triage_error_rate
        under_triage_error_rate = anova_error_rates_df[mask_series].under_triage_error_rate.squeeze()
        if not isinstance(under_triage_error_rate, Series): row_dict['under_triage_error_rate'] = under_triage_error_rate
        critical_triage_error_rate = anova_error_rates_df[mask_series].critical_triage_error_rate.squeeze()
        if not isinstance(critical_triage_error_rate, Series): row_dict['critical_triage_error_rate'] = critical_triage_error_rate
    
    row_dict['encounter_layout'] = encounter_layout
    rows_list.append(row_dict)
anova_df = DataFrame(rows_list)
anova_df.mean_percentage_tag_correct = anova_df.mean_percentage_tag_correct.apply(lambda x: f'{x:.8f}')
anova_df.over_triage_error_rate = anova_df.over_triage_error_rate.apply(lambda x: f'{x:.1g}')
anova_df.critical_triage_error_rate = anova_df.critical_triage_error_rate.apply(lambda x: f'{x:.0g}')

In [22]:

columns_list = [
    'ID', 'Group', 'file_name', 'session_file_name', 'session_uuid', 'scene_id', 'time_to_triage_scene', 'time_to_last_hemorrhage_controlled',
    'time_to_hemorrhage_control_per_patient', 'mean_percentage_tag_correct', 'over_triage_error_rate', 'under_triage_error_rate',
    'critical_triage_error_rate', 'encounter_layout'
]
anova_df = anova_apr24_final_df.merge(anova_df[fu.scene_groupby_columns], on=['session_uuid'], how='left')[columns_list]


We are requesting some additional data and I'm hoping this is possible. When convenient, could you add to the anova_df data set one column for each measure below and indicate for each responder as follows:<ul>
    <li>swap measure: put the number of patient swaps (0,1,2,3,4,5)</li>
    <li>walker issue command: whether the responder issued the walk command (1) or not (0)</li>
    <li>waver issue command: whether the responder issued the wave command (1) or not (0)</li>
</ul>
So it should like this:<table>
<tr><th>ID</th><th>swap measure</th><th>walker command</th><th>wave command</th></tr>
<tr><td>Responder 1</td><td>0</td><td>1</td><td>1</td></tr>
<tr><td>Responder 2</td><td>2</td><td>1</td><td>1</td></tr>
<tr><td>Responder 3</td><td>1</td><td>0</td><td>1</td></tr></table>

In [12]:

sort_dict

{'still': [776077, 911486, 1062509], 'waver': [809173, 839663, 887011, 937908, 1014710], 'walker': [880111, 882317, 902815]}

In [13]:

unsort_dict

{776077: 'still', 911486: 'still', 1062509: 'still', 809173: 'waver', 839663: 'waver', 887011: 'waver', 937908: 'waver', 1014710: 'waver', 880111: 'walker', 882317: 'walker', 902815: 'walker'}

In [14]:

print([unsort_dict[i] for i in ideal_sequence], [unsort_dict[a] for a in actual_sequence])

['still', 'still', 'still', 'waver', 'waver', 'waver', 'waver', 'waver', 'walker', 'walker', 'walker'] ['still', 'waver', 'waver', 'walker', 'walker', 'waver', 'walker', 'still', 'waver', 'waver', 'still']


In [16]:

nu.count_swaps_to_perfect_order??

[0;31mSignature:[0m [0mnu[0m[0;34m.[0m[0mcount_swaps_to_perfect_order[0m[0;34m([0m[0mideal_list[0m[0;34m,[0m [0mcompared_list[0m[0;34m,[0m [0mverbose[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m   
    [0;34m@[0m[0mstaticmethod[0m[0;34m[0m
[0;34m[0m    [0;32mdef[0m [0mcount_swaps_to_perfect_order[0m[0;34m([0m[0mideal_list[0m[0;34m,[0m [0mcompared_list[0m[0;34m,[0m [0mverbose[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m        [0;34m"""[0m
[0;34m        Counts the number of swaps required to make compared_list identical to ideal_list[0m
[0;34m        without penalizing lists with repeated elements.[0m
[0;34m        [0m
[0;34m        Parameters:[0m
[0;34m            ideal_list (list): The list representing the ideal order.[0m
[0;34m            compared_list (list): The list to be compared and modified.[0m
[0;34m        [0m
[0;34m        Returns:[0m
[0