In [1]:

%pprint
import sys
sys.path.insert(1, '../py')

Pretty printing has been turned OFF


In [2]:

from FRVRS import nu, fu
from pandas import DataFrame
from itertools import product
from tqdm import tqdm_notebook as tqdm

In [3]:

# load data frames
data_frames_list = nu.load_data_frames(frvrs_logs_df='frvrs_logs_df')
frvrs_logs_df = data_frames_list['frvrs_logs_df']
print(frvrs_logs_df.shape) # (829116, 125)

Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/frvrs_logs_df.pkl.
(829116, 125)



# Create a DataFrame of Scene Sequences

In [4]:

if nu.pickle_exists('scene_sequences_df'): scene_sequences_df = nu.load_object('scene_sequences_df')
else:
    rows_list = []
    engagment_columns_list = ['patient_id', 'engagement_start', 'location_tuple', 'patient_sort', 'predicted_priority', 'injury_severity']
    data_frames_list = nu.load_data_frames(scene_stats_df='scene_stats_df')
    scene_stats_df = data_frames_list['scene_stats_df']
    for (session_uuid, scene_id), idx_df in scene_stats_df.groupby(fu.scene_groupby_columns):
        
        # Get the whole scene history
        mask_series = True
        for cn in fu.scene_groupby_columns: mask_series &= (frvrs_logs_df[cn] == eval(cn))
        scene_df = frvrs_logs_df[mask_series]
        
        # Get the engagement sequence and the stats from that
        actual_engagement_order = fu.get_engagement_starts_order(scene_df, verbose=False)
        if actual_engagement_order:
            df = DataFrame(actual_engagement_order, columns=engagment_columns_list)
            row_dict = {cn: eval(cn) for cn in fu.scene_groupby_columns}
            patient_sorts_list = df.apply(lambda row_series: row_series.patient_sort, axis='columns').tolist()
            injury_severitys_list = df.apply(lambda row_series: row_series.injury_severity, axis='columns').tolist()
            for i, (patient_sort, injury_severity) in enumerate(zip(patient_sorts_list, injury_severitys_list)):
                row_dict[f'patient_sort{i:02d}'] = patient_sort
                row_dict[f'injury_severity{i:02d}'] = injury_severity
                row_dict[f'combo{i:02d}'] = str(patient_sort) + '-' + str(injury_severity)
            rows_list.append(row_dict)
    scene_sequences_df = DataFrame(rows_list)
    nu.store_objects(scene_sequences_df=scene_sequences_df)
    nu.save_data_frames(scene_sequences_df=scene_sequences_df)
print(scene_sequences_df.shape) # (696, 53)

(696, 53)


In [6]:

scene_sequences_df.sample(8).dropna(axis='columns', how='all').T

Unnamed: 0,388,49,584,497,346,314,377,107
session_uuid,87efcdf5-9090-4fa5-afac-25c778924e9a,0fa439d6-476c-49a5-b478-48e3454974e1,daca96a3-b61d-4087-9747-cea6fd7e3a43,b440a9ce-e511-43e3-bc20-c9b2bed11005,788af03e-aeee-4d53-8c97-f0060df61932,71197277-ba36-4a82-9ae0-0016e7756665,84f8db6a-1993-493f-b6cb-609ac3b45107,251a4532-ff1f-4182-91ce-4215786a339a
scene_id,1,1,0,0,1,1,0,0
patient_sort00,still,still,waver,still,still,still,walker,still
injury_severity00,high,high,medium,high,high,high,low,high
combo00,still-high,still-high,waver-medium,still-high,still-high,still-high,walker-low,still-high
patient_sort01,still,still,waver,waver,walker,still,waver,still
injury_severity01,high,high,high,medium,low,high,medium,high
combo01,still-high,still-high,waver-high,waver-medium,walker-low,still-high,waver-medium,still-high
patient_sort02,waver,still,still,walker,still,still,waver,waver
injury_severity02,medium,high,high,medium,high,high,high,medium


In [None]:

for cn in ['patient_sort', 'injury_severity', 'combo']:
    print(cn)
    sequence_items_list = []
    value_counts_series = scene_sequences_df[f'{cn}00'].value_counts()
    sequence_item = value_counts_series.index[0]
    sequence_items_list.append(sequence_item)
    mask_series = (scene_sequences_df[f'{cn}00'] == sequence_item)
    mask_series_dict = {0: mask_series}
    for i in range(1, 17):
        value_counts_series = scene_sequences_df[mask_series_dict[i-1]][f'{cn}{i:02d}'].value_counts()
        if value_counts_series.shape[0]:
            sequence_item = value_counts_series.index[0]
            sequence_items_list.append(sequence_item)
            mask_series = (scene_sequences_df[f'{cn}{i:02d}'] == sequence_item)
            mask_series_dict[i] = mask_series
        else: break
    print(sequence_items_list)
    for key, mask_series in mask_series_dict.items(): print(key, scene_sequences_df[mask_series].shape)
    display(scene_sequences_df[mask_series].dropna(axis='columns', how='all').T)
    df = scene_sequences_df[mask_series_dict[9]]
    display(df.sample(8).dropna(axis='columns', how='all').T)

In [None]:

# What are the worst examples of engagement sequences?
for cn, el in zip(['patient_sort', 'injury_severity', 'combo'], ['still', 'high', 'still-high']):
    for i in range(16, -1, -1):
        mask_series = (scene_sequences_df[f'{cn}{i:02d}'] == el)
        if mask_series.any(): break
    print(f'{cn}{i:02d}')
    df = scene_sequences_df[mask_series]
    print(df.shape)
    print([df[f'{cn}{i:02d}'].squeeze() for i in range(1, 17)])
    display(df.dropna(axis='columns', how='all').T)

In [5]:

# Get all patient SORT order seuqnces for 11-patient scenes
sequences = [seq for seq in product(fu.patient_sort_order, repeat=11) if len(seq) == 11]
print(len(sequences))

177147


In [6]:

# Discover the most popular sequence
max_count = -1
max_patient_sort_sequence = []
for seq in tqdm(sequences):
    mask_series = True
    for i, sort_category in enumerate(seq): mask_series &= (scene_sequences_df[f'patient_sort{i:02d}'] == sort_category)
    sequence_count = scene_sequences_df[mask_series].shape[0]
    if max_count < sequence_count:
        max_count = sequence_count
        max_patient_sort_sequence = seq

  0%|          | 0/177147 [00:00<?, ?it/s]

In [7]:

max_sequence_str = nu.conjunctify_nouns([f'"{seq}"' for seq in max_patient_sort_sequence])
print(
    f'The most popular patient engagement SORT sequence in the 11-patient scenes is {max_sequence_str} (used in {max_count} different scenes).'
    ' So the Right Ordering (still patients first, then waver patients, then walker patients) can still be thought of as a good measure.'
)

The most popular patient engagement SORT sequence in the 11-patient scenes is "still", "still", "still", "waver", "waver", "waver", "waver", "waver", "walker", "walker", and "walker" (used in 55 different scenes). So the Right Ordering (still patients first, then waver patients, then walker patients) can still be thought of as a good measure.


In [8]:

sequences = [seq for seq in product(fu.injury_severity_order, repeat=11) if len(seq) == 11]
print(len(sequences))

177147


In [9]:

max_count = -1
max_injury_severity_sequence = []
for seq in tqdm(sequences):
    mask_series = True
    for i, sort_category in enumerate(seq): mask_series &= (scene_sequences_df[f'injury_severity{i:02d}'] == sort_category)
    sequence_count = scene_sequences_df[mask_series].shape[0]
    if max_count < sequence_count:
        max_count = sequence_count
        max_injury_severity_sequence = seq

  0%|          | 0/177147 [00:00<?, ?it/s]

In [10]:

max_sequence_str = nu.conjunctify_nouns([f'"{seq}"' for seq in max_injury_severity_sequence])
print(
    f'The most popular patient engagement by injury severity sequence in the 11-patient scenes is {max_sequence_str} (used in {max_count} different scenes).'
)

The most popular patient engagement by injury severity sequence in the 11-patient scenes is "high", "high", "high", "medium", "high", "medium", "medium", "medium", "low", "low", and "low" (used in 26 different scenes).


In [12]:

# Get the combo alphabet list
# combos_alphabet = sorted(set([sequence_tuple[0] + '-' + sequence_tuple[1] for sequence_tuple in product(fu.patient_sort_order, fu.injury_severity_order)]))
combos_alphabet = sorted(set([str(patient_sort) + '-' + str(injury_severity) for patient_sort, injury_severity in zip(max_patient_sort_sequence, max_injury_severity_sequence)]))
combos_alphabet

['still-high', 'walker-low', 'waver-high', 'waver-medium']

In [13]:

sequences = [seq for seq in product(combos_alphabet, repeat=11) if len(seq) == 11]
print(len(sequences))

4194304


In [14]:

max_count = -1
max_combo_sequence = []
for seq in tqdm(sequences):
    mask_series = True
    for i, sort_category in enumerate(seq): mask_series &= (scene_sequences_df[f'combo{i:02d}'] == sort_category)
    sequence_count = scene_sequences_df[mask_series].shape[0]
    if max_count < sequence_count:
        max_count = sequence_count
        max_combo_sequence = seq

  0%|          | 0/4194304 [00:00<?, ?it/s]

In [15]:

max_sequence_str = nu.conjunctify_nouns([f'"{seq}"' for seq in max_combo_sequence])
print(
    f'The most popular patient engagement by SORT-severity sequence in the 11-patient scenes is {max_sequence_str} (used in {max_count} different scenes).'
)

The most popular patient engagement by SORT-severity sequence in the 11-patient scenes is "still-high", "still-high", "still-high", "waver-medium", "waver-high", "waver-medium", "waver-medium", "waver-medium", "walker-low", "walker-low", and "walker-low" (used in 26 different scenes).
