In [1]:

# Set up notebook
%pprint
import sys
if ('../py' not in sys.path): sys.path.insert(1, '../py')

Pretty printing has been turned OFF


In [2]:

# load libraries
from FRVRS import fu, nu
from datetime import date, timedelta
from pandas import DataFrame, to_datetime, Series
import numpy as np
import os
import os.path as osp
from IPython.display import HTML
import pandas as pd


# Develop the Correct SORT Order Metric for Metrics Evaluation Open World

In [3]:

# load data frames
data_frames_list = nu.load_data_frames(
    metrics_evaluation_open_world_df='', metrics_evaluation_open_world_file_stats_df='', metrics_evaluation_open_world_scene_stats_df=''
)
logs_df = data_frames_list['metrics_evaluation_open_world_df']
file_stats_df = data_frames_list['metrics_evaluation_open_world_file_stats_df']
scene_stats_df = data_frames_list['metrics_evaluation_open_world_scene_stats_df']

Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_df.pkl.
Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_file_stats_df.pkl.
Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_scene_stats_df.pkl.


In [8]:

patient_count_filter_fn = (lambda scene_df: scene_df.patient_id.nunique() == 11)
elevens_df = fu.get_elevens_data_frame(
    logs_df, file_stats_df, scene_stats_df, needed_columns=['scene_type', 'is_scene_aborted', 'is_a_one_triage_file', 'responder_category'],
    patient_count_filter_fn=patient_count_filter_fn
)


# Get First11 Dataset

In [6]:

# Get the optimal order data frame
if nu.pickle_exists('first11_optimal_order_df'): first11_optimal_order_df = nu.load_object('first11_optimal_order_df')
else:
    file_path = '../data/xlsx/First11_Summary_Sheet_Optimal_Order_Groups_with_names.xlsx'
    first11_optimal_order_df = pd.read_excel(file_path)
    columns_list = first11_optimal_order_df.iloc[:2].stack().dropna().tolist()
    first11_optimal_order_df = first11_optimal_order_df.loc[2:].dropna(axis='columns', how='all')
    first11_optimal_order_df.columns = columns_list[1:]
    mask_series = ~first11_optimal_order_df.Names.isnull()
    idx_list = first11_optimal_order_df[mask_series].index.tolist()
    rows_list = []
    for start, stop in zip(idx_list, idx_list[1:]):
        df = first11_optimal_order_df.loc[start: stop-1]
        injuries_list = df.Injuries.dropna().to_list()
        vitals_list = df.Vitals.dropna().to_list()
        row_dict = list(df.drop(columns=['Injuries', 'Vitals']).dropna(axis='index', how='all').T.to_dict().values())[0]
        row_dict['Injuries'] = '/'.join(injuries_list)
        row_dict['Vitals'] = '/'.join(vitals_list)
        rows_list.append(row_dict)
    df = first11_optimal_order_df.loc[stop:]
    injuries_list = df.Injuries.dropna().to_list()
    vitals_list = df.Vitals.dropna().to_list()
    row_dict = list(df.drop(columns=['Injuries', 'Vitals']).dropna(axis='index', how='all').T.to_dict().values())[0]
    row_dict['Injuries'] = '/'.join(injuries_list)
    row_dict['Vitals'] = '/'.join(vitals_list)
    rows_list.append(row_dict)
    first11_optimal_order_df = DataFrame(rows_list)
    print(first11_optimal_order_df.shape)
    
    # Save so you don't have to run it again
    nu.store_objects(first11_optimal_order_df=first11_optimal_order_df)
    nu.save_data_frames(first11_optimal_order_df=first11_optimal_order_df)

display(first11_optimal_order_df)

(11, 7)
Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/first11_optimal_order_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/first11_optimal_order_df.csv


Unnamed: 0,SALT,Priority,Life Saving Intervention,Group,Names,Injuries,Vitals
0,Immediate,Still/Life Threat,Needle Decompression,1,Gary_3,Collapsed Chest,Pulse fast/Resp. labored/Responds
1,Immediate,Still/Life Threat,Tourniquet,1,Lily_2,Shin Amputation,Pulse Fast/Resp. Fast/Responds/Waves
2,Dead,Still/Life Threat,,1,Bob_0,Face Shrapnel/Collapsed Chest/Stomach Puncture...,Pulse absent/Resp. Absent/Responds No
3,Expectant,Still/Life Threat,,1,Gary_1,Face Shrapnel,Pulse faint/Resp. none/Responds No
4,Immediate,Still/Life Threat,Wound Packing,1,Mike_5,Side Puncture/Thigh Laceration,Pulse Fast/Resp. Fast/Responds/Waves
5,Immediate,Still/Life Threat,Wound Packing,1,Lily_4,Side Puncture,Pulse fast/Resp. Normal/Waves
6,Delayed,Still/Life Threat,Tourniquet,2,Mike_7,Thigh Puncture/Calf Laceration,Pulse Fast/Resp. Normal/Responds/Waves
7,Delayed,Wave,None (gauze optional),2,Gloria_6,Calf Shrapnel,Pulse Normal/Resp. Normal/Responds/Waves
8,Minimal,Walk,,3,Bob_9,Ear Bleed,Pulse normal/Resp. Normal/Responds?/Walks/Waves
9,Minimal,Walk,None (gauze optional),3,Gloria_8,Forearm Laceration,Pulse normal/Resp. Normal/Responds/Walks/Waves


In [7]:

# Get the patients list from the spreadsheet
mask_series = ~first11_optimal_order_df.Names.isnull()
first11_patients_list = sorted(first11_optimal_order_df[mask_series].Names.map(lambda x: str(x).replace(' Root', '')).unique())
first11_patients_list

['Bob_0', 'Bob_9', 'Gary_1', 'Gary_3', 'Gloria_6', 'Gloria_8', 'Helga_10', 'Lily_2', 'Lily_4', 'Mike_5', 'Mike_7']

In [9]:

# Get the list of all patients in the Metrics Evaluation Open World
mask_series = ~elevens_df.patient_id.isnull()
all_patients_list = sorted(elevens_df[mask_series].patient_id.map(lambda x: str(x).replace(' Root', '')).unique())
all_patients_list

['Civilian 1', 'Civilian 2', 'NPC', 'Navy Soldier 1 Male', 'Navy Soldier 2 Male', 'Navy Soldier 3 Male', 'Navy Soldier 4 Female', 'Open World Marine 1 Male', 'Open World Marine 2 Female', 'Open World Marine 3 Male', 'Open World Marine 4 Male', 'Patient U', 'Patient V', 'Patient W', 'Patient X', 'Simulation', 'bystander', 'electrician', 'patient U', 'patient V', 'patient W', 'patient X']

In [11]:

# Get the list of patient_ids that are missing a group designation
nongrouped_patients_list = sorted(set(all_patients_list).difference(set(first11_patients_list)))
print(
    f'There are patient_ids in the Metrics Evaluation Open World missing from the guide ({nu.conjunctify_nouns(nongrouped_patients_list)})'
    ' that need to be given a Group designation in order to compute the Correct SORT Order metric.'
)

There are patient_ids in the First Responder Master Registry missing from the guide (Bob_10, Bob_4, Gary_5, Gary_9, Gloria_2, Helga_0, Lily_1, Lily_7, Mike, Mike_0, Mike_1, Mike_2, Mike_3, Mike_4, Mike_6, and Mike_9) that need to be given a Group designation in order to compute the Correct SORT Order metric.


In [18]:

# Get a set and count of files that have each patient
import re

# files_regex = re.compile(f",({'|'.join(nongrouped_patients_list)}) Root,")
folder_path = '../data/logs/Metrics Evaluation Open World 2.26.2026'
rows_list = []
for file_name in os.listdir(path=folder_path):
    if file_name.endswith('.csv'):
        file_path = osp.join(folder_path, file_name)
        with open(file_path, 'r', encoding=nu.encoding_type) as f:
            text = f.read()
            for patient_prefix in nongrouped_patients_list:
                files_regex = re.compile(f",{patient_prefix} Root,")
                patients_list = files_regex.findall(text)
                if patients_list:
                    row_dict = {'file_name': file_name, 'patient_prefix': patient_prefix, 'results_count': len(patients_list)}
                    rows_list.append(row_dict)
nongrouped_patients_df = DataFrame(rows_list)

In [30]:

# Match the next-most-hardest-to-find patient with the next-most-populated file
filename_set = set()
filenames_list = nongrouped_patients_df.groupby('file_name').size().sort_values(ascending=False).index.tolist()
for patient_prefix in nongrouped_patients_df.groupby('patient_prefix').size().sort_values().index:
    base_mask_series = (nongrouped_patients_df.patient_prefix == patient_prefix)
    for file_name in filenames_list:
        mask_series = base_mask_series & (nongrouped_patients_df.file_name == file_name)
        if mask_series.any():
            filename_set.add(file_name)
            break
filename_set

{'23.03.14.1252.csv', '23.05.31.1049.csv', '22.12.07.0833.csv', '22.11.30.0813.csv'}

In [32]:

print('"' + '" "'.join(list(filename_set)) + '"')

"23.03.14.1252.csv" "23.05.31.1049.csv" "22.12.07.0833.csv" "22.11.30.0813.csv"


In [81]:

# Conform the Metrics Evaluation Open World to the spreadsheet
mask_series = ~elevens_df.patient_id.isnull()
elevens_patients_df = elevens_df[mask_series]
elevens_patients_df.patient_id = elevens_patients_df.patient_id.map(lambda x: str(x).replace(' Root', ''))

In [None]:

# Get the columns that consistently have only one value in them per patient
single_value_cols_set = set(elevens_patients_df.columns)
for patient_id, patient_df in elevens_patients_df.groupby('patient_id'):
    single_value_cols = set([col for col in patient_df.columns if patient_df[col].nunique() == 1])
    single_value_cols_set = single_value_cols_set.intersection(single_value_cols)
# print(single_value_cols_set)
union_set = set()
for patient_id, patient_df in elevens_patients_df.groupby('patient_id'):
    if patient_id in first11_patients_list:
        # print(patient_id, patient_df.shape[0])
        single_value_cols = set([col for col in patient_df.columns if patient_df[col].nunique() == 1])
        union_set = union_set.union(single_value_cols.difference(single_value_cols_set))
# display(elevens_patients_df[union_set])
print(sorted(union_set))

In [None]:

# Verify that the patient_id's injuries and attributes are consistent with the spreadsheet
patient_columns_list = ['patient_id', 'patient_breath', 'patient_hearing', 'patient_mood', 'patient_pose', 'patient_pulse', 'patient_salt', 'patient_sort']
columns_list = patient_columns_list + ['injury_id']#, 'injury_body_region', 'injury_required_procedure', 'injury_severity', 'pulse_taken_pulse_name', 'tool_type']
mask_series = elevens_patients_df.patient_id.isin(first11_patients_list)
for patient_id, patient_df in elevens_patients_df[mask_series][columns_list].sort_values(patient_columns_list).groupby('patient_id'):
    injuries_list = patient_df.injury_id.dropna().unique()
    patient_df = patient_df[patient_columns_list].drop_duplicates(subset=patient_columns_list).dropna(subset=patient_columns_list[1:], how='all')
    patient_df['patient_injuries'] = '/'.join(injuries_list)
    display(patient_df.merge(first11_optimal_order_df, left_on='patient_id', right_on='Names').T)

In [97]:

# Find the most popular patient_id list not in the spreadsheet
patients_list = [x + ' Root' for x in first11_patients_list]
print(patients_list)
bads_list = []
for (session_uuid, scene_id), scene_df in elevens_df.groupby(fu.scene_groupby_columns):
    mask_series = ~scene_df.patient_id.isnull()
    patients_df = scene_df[mask_series]
    mask_series = patients_df.patient_id.isin(patients_list)
    if not mask_series.all():
        mask_series = ~scene_df.patient_id.isnull()
        not_first11_list = sorted(scene_df[mask_series].patient_id.unique())
        bads_list.append(str(not_first11_list))
Series(bads_list).value_counts()

['Bob_0 Root', 'Bob_9 Root', 'Gary_1 Root', 'Gary_3 Root', 'Gloria_6 Root', 'Gloria_8 Root', 'Helga_10 Root', 'Lily_2 Root', 'Lily_4 Root', 'Mike_5 Root', 'Mike_7 Root']


['Bob_0 Root', 'Gary_1 Root', 'Gary_3 Root', 'Gary_9 Root', 'Gloria_6 Root', 'Gloria_8 Root', 'Helga_10 Root', 'Lily_2 Root', 'Lily_4 Root', 'Mike_5 Root', 'Mike_7 Root']                                  232
['Bob_10 Root', 'Gary_5 Root', 'Gloria_2 Root', 'Gloria_8 Root', 'Helga_0 Root', 'Lily_1 Root', 'Lily_7 Root', 'Mike_3 Root', 'Mike_4 Root', 'Mike_6 Root', 'Mike_9 Root']                                   33
['Bob_0 Root', 'Gary_1 Root', 'Gary_3 Root', 'Gary_9 Root', 'Gloria_6 Root', 'Gloria_8 Root', 'Helga_10 Root', 'Lily_2 Root', 'Lily_4 Root', 'Mike Root', 'Mike_5 Root', 'Mike_7 Root']                       7
['Bob_10 Root', 'Bob_4 Root', 'Gary_5 Root', 'Gloria_2 Root', 'Gloria_8 Root', 'Helga_0 Root', 'Lily_1 Root', 'Lily_7 Root', 'Mike_3 Root', 'Mike_6 Root', 'Mike_9 Root']                                     5
['Bob_0 Root', 'Gary_1 Root', 'Gary_3 Root', 'Gary_9 Root', 'Gloria_6 Root', 'Gloria_8 Root', 'Helga_10 Root', 'Lily_2 Root', 'Lily_4 Root', 'Mike_2 Root', 'Mike_5 Root

In [95]:

# Count the scenes that don't contain the spreadsheet's patient_id list
sum([not scene_df[~scene_df.patient_id.isnull()].patient_id.isin(patients_list).all() for _, scene_df in elevens_df.groupby(fu.scene_groupby_columns)])

285

In [93]:

mask_series = ~scene_df.patient_id.isnull()
sorted(scene_df[mask_series].patient_id.unique())

['Bob_0 Root', 'Gary_1 Root', 'Gary_3 Root', 'Gary_9 Root', 'Gloria_6 Root', 'Gloria_8 Root', 'Helga_10 Root', 'Lily_2 Root', 'Lily_4 Root', 'Mike_5 Root', 'Mike_7 Root']

In [100]:

# Paste that list into a discussion
nu.conjunctify_nouns([
    'Bob_0 Root', 'Gary_1 Root', 'Gary_3 Root', 'Gary_9 Root', 'Gloria_6 Root', 'Gloria_8 Root', 'Helga_10 Root', 'Lily_2 Root', 'Lily_4 Root',
    'Mike_5 Root', 'Mike_7 Root'
])

'Bob_0 Root, Gary_1 Root, Gary_3 Root, Gary_9 Root, Gloria_6 Root, Gloria_8 Root, Helga_10 Root, Lily_2 Root, Lily_4 Root, Mike_5 Root, and Mike_7 Root'

In [None]:

def get_actual_and_ideal_sequences(scene_df, verbose=False):
    """
    Extracts the actual and ideal sequences of first interactions from a scene dataframe.

    Parameters:
        scene_df (pandas.DataFrame): DataFrame containing patient interactions with columns, including 'patient_sort' and 'patient_id'.
        verbose (bool, optional): Whether to print intermediate results for debugging. Defaults to False.

    Returns:
        tuple: A tuple of three elements:
            actual_sequence (pandas.Series): The actual sequence of first interactions, sorted.
            ideal_sequence (pandas.Series): Series of ideal patient interactions based on SORT categories.
            sort_dict (dict): Dictionary containing lists of first interactions for each SORT category.

    Notes:
        Only SORT categories included in `fu.patient_sort_order` are considered.
        None values in the resulting lists indicate missing interactions.
    """

    # Group patients by their SORT category and get lists of their elapsed times
    sort_dict = {}
    for sort, patient_sort_df in scene_df.groupby('patient_sort'):

        # Only consider SORT categories included in the patient_sort_order
        if sort in fu.patient_sort_order:

            # Loop through the SORT patients to add their first interactions to the action list
            action_list = []
            for patient_id in patient_sort_df.patient_id.unique():
                mask_series = (scene_df.patient_id == patient_id)
                patient_actions_df = scene_df[mask_series]
                action_list.append(fu.get_first_patient_interaction(patient_actions_df))

            # Sort the list of first interactions
            if verbose: display(sort, action_list)
            sort_dict[sort] = sorted([action for action in action_list if action is not None])

    # Get the whole ideal and actual sequences
    ideal_sequence = []
    for sort in fu.patient_sort_order: ideal_sequence.extend(sort_dict.get(sort, []))
    ideal_sequence = Series(data=ideal_sequence)
    actual_sequence = ideal_sequence.sort_values(ascending=True)

    return actual_sequence, ideal_sequence, sort_dict


# Maintenance


## Get Testing Dataset

In [None]:

# Get sample scene data frame for testing
columns_list = ['patient_sort', 'patient_id', 'action_type', 'action_tick']
for (session_uuid, scene_id), scene_df in elevens_df.groupby(fu.scene_groupby_columns):
    df = DataFrame([], columns=columns_list)
    mask_series = ~scene_df.patient_sort.isnull() & scene_df.action_type.isin(fu.responder_negotiations_list)
    for patient_id, patient_df in scene_df[mask_series].groupby('patient_id'):
        df = pd.concat([df, patient_df[columns_list].sort_values('action_tick').head(1)], axis='index')
    if (df.shape[0] > 6): break
print("""
        # Sample scene dataframe
        self.scene_df = pd.DataFrame({""")
for k, v in dict(df.sample(6).to_dict(orient='list')).items(): print(f"            '{k}': {v},")
print('        })')

In [None]:

# Sample scene dataframe
scene_df = pd.DataFrame({
    'patient_sort': ['waver', 'waver', 'still', 'waver', 'waver', 'walker'],
    'patient_id': ['Gloria_6 Root', 'Lily_2 Root', 'Gary_3 Root', 'Mike_5 Root', 'Lily_4 Root', 'Gloria_8 Root'],
    'action_type': ['PATIENT_ENGAGED', 'PATIENT_ENGAGED', 'PATIENT_ENGAGED', 'PATIENT_ENGAGED', 'PATIENT_ENGAGED', 'PATIENT_ENGAGED'],
    'action_tick': [384722, 409276, 336847, 438270, 607365, 346066],
})

expected_actual_sequence, expected_ideal_sequence, expected_sort_dict = fu.get_actual_and_ideal_sequences(scene_df)
print(f"""
        # Expected results
        expected_actual_sequence = pd.Series(data={str(expected_actual_sequence.tolist()).replace('.0', '').replace('nan', 'np.nan')})
        expected_ideal_sequence = pd.Series(data={str(expected_ideal_sequence.tolist()).replace('.0', '').replace('nan', 'np.nan')})
        expected_sort_dict = {expected_sort_dict}""")

In [7]:

sorted([cn for cn in elevens_df.columns if 'sort' in cn])

['patient_demoted_sort', 'patient_engaged_sort', 'patient_record_sort', 'patient_sort', 's_a_l_t_walk_if_can_sort_command_text', 's_a_l_t_walk_if_can_sort_location', 's_a_l_t_walked_sort_command_text', 's_a_l_t_walked_sort_location', 's_a_l_t_wave_if_can_sort_command_text', 's_a_l_t_wave_if_can_sort_location', 's_a_l_t_waved_sort_command_text', 's_a_l_t_waved_sort_location']