In [1]:

# Set up notebook
%pprint
import sys
if (osp.join(os.pardir, 'py') not in sys.path): sys.path.insert(1, osp.join(os.pardir, 'py'))

Pretty printing has been turned OFF


In [2]:

# load libraries
from FRVRS import (nu, fu, DataFrame, to_datetime, Series, np, osp, re, listdir, display, read_excel, concat)
from datetime import date, timedelta
import os
from IPython.display import HTML
import pandas as pd


# Develop the Correct SORT Order Metric for Metrics Evaluation Open World

In [3]:

# load data frames
data_frames_dict = nu.load_data_frames(
    metrics_evaluation_open_world_csv_stats_df='', metrics_evaluation_open_world_json_stats_df='',
    metrics_evaluation_open_world_scene_stats_df=''
)
logs_df = data_frames_dict['metrics_evaluation_open_world_csv_stats_df']
json_stats_df = data_frames_dict['metrics_evaluation_open_world_json_stats_df']
scene_stats_df = data_frames_dict['metrics_evaluation_open_world_scene_stats_df']

Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_csv_stats_df.pkl.
Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_json_stats_df.pkl.
Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/metrics_evaluation_open_world_scene_stats_df.pkl.


In [4]:

filter_fn = (lambda scene_df: True) # scene_df.patient_id.nunique() == 11
elevens_df = fu.get_elevens_dataframe(
    logs_df, json_stats_df, scene_stats_df, needed_columns=['scene_type', 'is_scene_aborted', 'is_a_one_triage_file', 'responder_category'],
    patient_count_filter_fn=filter_fn
)


# Get First11 Dataset

In [10]:

# Get the list of patient_ids that are missing a group designation
print(
    f'There are patient_ids in the Metrics Evaluation Open World missing from the guide ({nu.conjunctify_nouns(nongrouped_patients_list)})'
    ' that need to be given a Group designation in order to compute the Correct SORT Order metric.'
)

There are patient_ids in the Metrics Evaluation Open World missing from the guide (Adept Shooter, Adept Victim, Civilian 1, Civilian 1 Female, Civilian 2, Local Soldier 1, Marine 1 Male, Marine 2 Male, Marine 3 Male, Marine 4 Male, NPC, NPC 1, NPC 2, NPC 3, NPC 4, Navy Soldier 1 Male, Navy Soldier 2 Male, Navy Soldier 3 Male, Navy Soldier 4 Female, Open World Civilian 1 Male, Open World Civilian 2 Female, Open World Marine 1 Female, Open World Marine 1 Male, Open World Marine 2 Female, Open World Marine 2 Male, Open World Marine 3 Male, Open World Marine 4 Male, Patient U, Patient V, Patient W, Patient X, Simulation, US Soldier 1, bystander, electrician, patient U, patient V, patient W, and patient X) that need to be given a Group designation in order to compute the Correct SORT Order metric.


In [13]:

# Get a set and count of files that have each patient
folder_path = '../data/logs/Metrics Evaluation Open World'
rows_list = []
for file_name in listdir(path=folder_path):
    if file_name.endswith('.csv'):
        file_path = osp.join(folder_path, file_name)
        with open(file_path, 'r', encoding=nu.encoding_type) as f:
            text = f.read()
            for patient_prefix in nongrouped_patients_list:
                files_regex = re.compile(f",{patient_prefix} Root,")
                patients_list = files_regex.findall(text)
                if patients_list:
                    row_dict = {'file_name': file_name, 'patient_prefix': patient_prefix, 'results_count': len(patients_list)}
                    rows_list.append(row_dict)
nongrouped_patients_df = DataFrame(rows_list)
print(nongrouped_patients_df.shape)

(0, 0)


In [14]:

# Match the next-most-hardest-to-find patient with the next-most-populated file
filename_set = set()
if nongrouped_patients_df.shape[0]:
    filenames_list = nongrouped_patients_df.groupby('file_name').size().sort_values(ascending=False).index.tolist()
    for patient_prefix in nongrouped_patients_df.groupby('patient_prefix').size().sort_values().index:
        base_mask_series = (nongrouped_patients_df.patient_prefix == patient_prefix)
        for file_name in filenames_list:
            mask_series = base_mask_series & (nongrouped_patients_df.file_name == file_name)
            if mask_series.any():
                filename_set.add(file_name)
                break
filename_set

set()

In [15]:

print('"' + '" "'.join(list(filename_set)) + '"')

""


In [17]:

# Conform the Metrics Evaluation Open World to the spreadsheet
mask_series = ~elevens_df.patient_id.isnull()
elevens_patients_df = elevens_df[mask_series]
elevens_patients_df.patient_id = elevens_patients_df.patient_id.map(lambda x: str(x).replace(' Root', ''))

In [18]:

# Get the columns that consistently have only one value in them per patient
single_value_cols_set = set(elevens_patients_df.columns)
for patient_id, patient_df in elevens_patients_df.groupby('patient_id'):
    single_value_cols = set([col for col in patient_df.columns if patient_df[col].nunique() == 1])
    single_value_cols_set = single_value_cols_set.intersection(single_value_cols)
# print(single_value_cols_set)
union_set = set()
for patient_id, patient_df in elevens_patients_df.groupby('patient_id'):
    if patient_id in first11_patients_list:
        # print(patient_id, patient_df.shape[0])
        single_value_cols = set([col for col in patient_df.columns if patient_df[col].nunique() == 1])
        union_set = union_set.union(single_value_cols.difference(single_value_cols_set))
# display(elevens_patients_df[union_set])
print(sorted(union_set))

[]


In [19]:

# Verify that the patient_id's injuries and attributes are consistent with the spreadsheet
patient_columns_list = [
    'patient_id', 'patient_breath', 'patient_hearing', 'patient_mood', 'patient_pose', 'patient_pulse', 'patient_salt', 'patient_sort'
]
columns_list = patient_columns_list + ['injury_id']
#, 'injury_body_region', 'injury_required_procedure', 'injury_severity', 'pulse_taken_pulse_name', 'tool_type']
mask_series = elevens_patients_df.patient_id.isin(first11_patients_list)
for patient_id, patient_df in elevens_patients_df[mask_series][columns_list].sort_values(patient_columns_list).groupby('patient_id'):
    injuries_list = patient_df.injury_id.dropna().unique()
    patient_df = patient_df[patient_columns_list].drop_duplicates(subset=patient_columns_list).dropna(
        subset=patient_columns_list[1:], how='all'
    )
    patient_df['patient_injuries'] = '/'.join(injuries_list)
    display(patient_df.merge(first11_optimal_order_df, left_on='patient_id', right_on='Patient').T)

In [20]:

# Find the most popular patient_id list not in the spreadsheet
patients_list = [x + ' Root' for x in first11_patients_list]
print(patients_list)
bads_list = []
for (session_uuid, scene_id), scene_df in elevens_df.groupby(fu.scene_groupby_columns):
    mask_series = ~scene_df.patient_id.isnull()
    patients_df = scene_df[mask_series]
    mask_series = patients_df.patient_id.isin(patients_list)
    if not mask_series.all():
        mask_series = ~scene_df.patient_id.isnull()
        not_first11_list = sorted(scene_df[mask_series].patient_id.unique())
        bads_list.append(str(not_first11_list))
Series(bads_list).value_counts()

['Bob_0 Root', 'Bob_9 Root', 'Gary_1 Root', 'Gary_3 Root', 'Gloria_6 Root', 'Gloria_8 Root', 'Helga_10 Root', 'Lily_2 Root', 'Lily_4 Root', 'Mike_5 Root', 'Mike_7 Root']


['Civilian 1 Root', 'Civilian 2 Root', 'NPC Root', 'Open World Marine 1 Male Root', 'Open World Marine 2 Female Root', 'Open World Marine 3 Male Root', 'Open World Marine 4 Male Root', 'Patient U Root', 'Patient V Root', 'Patient W Root', 'Patient X Root', 'Simulation Root']                                                             7
['Navy Soldier 1 Male Root', 'Navy Soldier 2 Male Root', 'Navy Soldier 3 Male Root', 'Navy Soldier 4 Female Root', 'Simulation Root', 'bystander Root', 'electrician Root', 'patient U Root', 'patient V Root', 'patient W Root', 'patient X Root']                                                                                             6
['Adept Shooter Root', 'Adept Victim Root', 'Civilian 1 Female Root', 'Marine 1 Male Root', 'Marine 2 Male Root', 'Marine 3 Male Root', 'Marine 4 Male Root', 'NPC 1 Root', 'NPC 2 Root', 'NPC 3 Root', 'NPC 4 Root', 'Patient U Root', 'Patient V Root', 'Patient W Root', 'Patient X Root', 'Simulation Root']                    

In [21]:

# Count the scenes that don't contain the spreadsheet's patient_id list
sum([not scene_df[~scene_df.patient_id.isnull()].patient_id.isin(patients_list).all() for _, scene_df in elevens_df.groupby(
    fu.scene_groupby_columns
)])

42

In [22]:

mask_series = ~scene_df.patient_id.isnull()
sorted(scene_df[mask_series].patient_id.unique())

['Adept Shooter Root', 'Adept Victim Root', 'Civilian 1 Female Root', 'Marine 1 Male Root', 'Marine 2 Male Root', 'Marine 3 Male Root', 'Marine 4 Male Root', 'NPC 1 Root', 'NPC 2 Root', 'NPC 3 Root', 'NPC 4 Root', 'Patient U Root', 'Patient V Root', 'Patient W Root', 'Patient X Root', 'Simulation Root']

In [23]:

# Paste that list into a discussion
nu.conjunctify_nouns([
    'Bob_0 Root', 'Gary_1 Root', 'Gary_3 Root', 'Gary_9 Root', 'Gloria_6 Root', 'Gloria_8 Root', 'Helga_10 Root', 'Lily_2 Root',
    'Lily_4 Root', 'Mike_5 Root', 'Mike_7 Root'
])

'Bob_0 Root, Gary_1 Root, Gary_3 Root, Gary_9 Root, Gloria_6 Root, Gloria_8 Root, Helga_10 Root, Lily_2 Root, Lily_4 Root, Mike_5 Root, and Mike_7 Root'


## Get the Swap Counts

In [8]:

# Get the list of all patients in the Metrics Evaluation Open World
mask_series = ~elevens_df.patient_id.isnull()
all_patients_list = sorted(elevens_df[mask_series].patient_id.map(lambda x: str(x).replace(' Root', '')).unique())
all_patients_list

['Adept Shooter', 'Adept Victim', 'Civilian 1', 'Civilian 1 Female', 'Civilian 2', 'Local Soldier 1', 'Marine 1 Male', 'Marine 2 Male', 'Marine 3 Male', 'Marine 4 Male', 'NPC', 'NPC 1', 'NPC 2', 'NPC 3', 'NPC 4', 'Navy Soldier 1 Male', 'Navy Soldier 2 Male', 'Navy Soldier 3 Male', 'Navy Soldier 4 Female', 'Open World Civilian 1 Male', 'Open World Civilian 2 Female', 'Open World Marine 1 Female', 'Open World Marine 1 Male', 'Open World Marine 2 Female', 'Open World Marine 2 Male', 'Open World Marine 3 Male', 'Open World Marine 4 Male', 'Patient U', 'Patient V', 'Patient W', 'Patient X', 'Simulation', 'US Soldier 1', 'bystander', 'electrician', 'patient U', 'patient V', 'patient W', 'patient X']

In [11]:

# Get the optimal order data frame
if nu.pickle_exists('first11_optimal_order_df'): first11_optimal_order_df = nu.load_object('first11_optimal_order_df')
else:
    file_path = '../data/xlsx/First11_Summary_Sheet_Optimal_Order_Groups_with_names.xlsx'
    first11_optimal_order_df = read_excel(file_path)
    columns_list = first11_optimal_order_df.iloc[:2].stack().dropna().tolist()
    first11_optimal_order_df = first11_optimal_order_df.loc[2:].dropna(axis='columns', how='all')
    first11_optimal_order_df.columns = columns_list[1:]
    mask_series = ~first11_optimal_order_df.Patient.isnull()
    idx_list = first11_optimal_order_df[mask_series].index.tolist()
    rows_list = []
    for start, stop in zip(idx_list, idx_list[1:]):
        df = first11_optimal_order_df.loc[start: stop-1]
        injuries_list = df.Injuries.dropna().to_list()
        vitals_list = df.Vitals.dropna().to_list()
        row_dict = list(df.drop(columns=['Injuries', 'Vitals']).dropna(axis='index', how='all').T.to_dict().values())[0]
        row_dict['Injuries'] = '/'.join(injuries_list)
        row_dict['Vitals'] = '/'.join(vitals_list)
        rows_list.append(row_dict)
    df = first11_optimal_order_df.loc[stop:]
    injuries_list = df.Injuries.dropna().to_list()
    vitals_list = df.Vitals.dropna().to_list()
    row_dict = list(df.drop(columns=['Injuries', 'Vitals']).dropna(axis='index', how='all').T.to_dict().values())[0]
    row_dict['Injuries'] = '/'.join(injuries_list)
    row_dict['Vitals'] = '/'.join(vitals_list)
    rows_list.append(row_dict)
    first11_optimal_order_df = DataFrame(rows_list)
    print(first11_optimal_order_df.shape) # (11, 7)
    
    # Save so you don't have to run it again
    nu.store_objects(first11_optimal_order_df=first11_optimal_order_df)
    nu.save_data_frames(first11_optimal_order_df=first11_optimal_order_df)

display(first11_optimal_order_df.sample(5))

Unnamed: 0,Patient,Injuries,Vitals,SALT,Priority,Required Life Saving Intervention,Group
3,Gary_1 Root,Face Shrapnel,Pulse faint|Resp. none|Responds No,Expectant,Still/Life Threat,,1
6,Mike_7 Root,L Thigh Puncture|Calf Laceration,Pulse Fast|Resp. Normal|Responds|Waves,Delayed,Still/Life Threat,AppliedTourniquet|AppliedDressingGauze (Optional),1
2,Bob_0 Root,Face Shrapnel|Collapsed Chest|Stomach Puncture...,Pulse absent|Resp. Absent|Responds No,Dead,Still/Life Threat,,1
10,Helga_10 Root,Forehead Scrape,Pulse normal|Resp. Normal|Responds|Walks|Waves,Minimal,Walk,,3
5,Lily_4 Root,L Side Puncture,Pulse fast|Resp. Normal|Waves,Immediate,Still/Life Threat,AppliedPackingGauze,1


In [12]:

# Get the patients list from the spreadsheet
mask_series = ~first11_optimal_order_df.Patient.isnull()
first11_patients_list = sorted(first11_optimal_order_df[mask_series].Patient.map(lambda x: str(x).replace(' Root', '')).unique())
first11_patients_list

['Bob_0', 'Bob_9', 'Gary_1', 'Gary_3', 'Gary_9', 'Gloria_6', 'Gloria_8', 'Helga_10', 'Lily_2', 'Lily_4', 'Mike_5', 'Mike_7']

In [15]:

# Get priority group dictionary and set a column with in the merge dataset
mask_series = ~first11_optimal_order_df.Patient.isnull()
priority_group_dict = first11_optimal_order_df[mask_series].set_index('Patient').Group.to_dict()
print(priority_group_dict)
mask_series = elevens_df.patient_id.isin(first11_patients_list)
elevens_df.loc[mask_series, 'priority_group'] = elevens_df.loc[mask_series, 'patient_id'].map(priority_group_dict)

{'Gary_3 Root': 1, 'Lily_2 Root': 1, 'Bob_0 Root': 1, 'Gary_1 Root': 1, 'Mike_5 Root': 1, 'Lily_4 Root': 1, 'Mike_7 Root': 1, 'Gloria_6 Root': 2, 'Bob_9 Root': 3, 'Gloria_8 Root': 3, 'Helga_10 Root': 3, 'Gary_9 Root': 3}


In [16]:

# Get a dataset of only first11 patients
nongrouped_patients_list = sorted(set(all_patients_list).difference(set(first11_patients_list)))
mask_series = ~elevens_df.patient_id.isin(nongrouped_patients_list)
grouped_patients_df = elevens_df[mask_series]
print(grouped_patients_df.shape[0], grouped_patients_df.session_uuid.nunique(), grouped_patients_df.shape[1]) # 390559 331 116

71280 26 112


In [17]:

# Examine the priority group sequences
rows_list = []
for (session_uuid, scene_id), scene_df in grouped_patients_df.groupby(fu.scene_groupby_columns):
    row_dict = {cn: eval(cn) for cn in fu.scene_groupby_columns}
    actual_sequence, ideal_sequence, sort_dict = fu.get_actual_and_ideal_priority_group_sequences(scene_df, verbose=False)
    unsort_dict = {v1: k for k, v in sort_dict.items() for v1 in v}
    row_dict['swaps_to_perfect_order_count'] = nu.count_swaps_to_perfect_order(
        [unsort_dict[i] for i in ideal_sequence], [unsort_dict[a] for a in actual_sequence]
    )
    rows_list.append(row_dict)
ow_count_swaps_df = DataFrame(rows_list)
print(ow_count_swaps_df.shape) # (334, 3)

nu.store_objects(ow_count_swaps_df=ow_count_swaps_df)
nu.save_data_frames(ow_count_swaps_df=ow_count_swaps_df)

(27, 3)
Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/ow_count_swaps_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/ow_count_swaps_df.csv



# Maintenance


## Get Testing Dataset

In [None]:

# Get sample scene data frame for testing
columns_list = ['patient_sort', 'patient_id', 'action_type', 'action_tick']
for (session_uuid, scene_id), scene_df in elevens_df.groupby(fu.scene_groupby_columns):
    df = DataFrame([], columns=columns_list)
    mask_series = ~scene_df.patient_sort.isnull() & scene_df.action_type.isin(fu.responder_negotiations_list)
    for patient_id, patient_df in scene_df[mask_series].groupby('patient_id'):
        df = concat([df, patient_df[columns_list].sort_values('action_tick').head(1)], axis='index')
    if (df.shape[0] > 6): break
print("""
        # Sample scene dataframe
        self.scene_df = pd.DataFrame({""")
for k, v in dict(df.sample(6).to_dict(orient='list')).items(): print(f"            '{k}': {v},")
print('        })')

In [None]:

# Sample scene dataframe
scene_df = DataFrame({
    'patient_sort': ['waver', 'waver', 'still', 'waver', 'waver', 'walker'],
    'patient_id': ['Gloria_6 Root', 'Lily_2 Root', 'Gary_3 Root', 'Mike_5 Root', 'Lily_4 Root', 'Gloria_8 Root'],
    'action_type': ['PATIENT_ENGAGED', 'PATIENT_ENGAGED', 'PATIENT_ENGAGED', 'PATIENT_ENGAGED', 'PATIENT_ENGAGED', 'PATIENT_ENGAGED'],
    'action_tick': [384722, 409276, 336847, 438270, 607365, 346066],
})

expected_actual_sequence, expected_ideal_sequence, expected_sort_dict = fu.get_actual_and_ideal_priority_group_sequences(scene_df)
print(f"""
        # Expected results
        expected_actual_sequence = pd.Series(data={str(expected_actual_sequence.tolist()).replace('.0', '').replace('nan', 'np.nan')})
        expected_ideal_sequence = pd.Series(data={str(expected_ideal_sequence.tolist()).replace('.0', '').replace('nan', 'np.nan')})
        expected_sort_dict = {expected_sort_dict}""")

In [7]:

sorted([cn for cn in elevens_df.columns if 'sort' in cn])

['patient_demoted_sort', 'patient_engaged_sort', 'patient_record_sort', 'patient_sort', 's_a_l_t_walk_if_can_sort_command_text', 's_a_l_t_walk_if_can_sort_location', 's_a_l_t_walked_sort_command_text', 's_a_l_t_walked_sort_location', 's_a_l_t_wave_if_can_sort_command_text', 's_a_l_t_wave_if_can_sort_location', 's_a_l_t_waved_sort_command_text', 's_a_l_t_waved_sort_location']