In [1]:

%pprint
import sys
if (osp.join('..', 'py') not in sys.path): sys.path.insert(1, osp.join('..', 'py'))

Pretty printing has been turned OFF


In [2]:

from FRVRS import (nu, fu, DataFrame)
from itertools import product
from tqdm import tqdm_notebook as tqdm

In [3]:

# load data frames
data_frames_dict = nu.load_data_frames(frvrs_logs_df='frvrs_logs_df')
frvrs_logs_df = data_frames_dict['frvrs_logs_df']
print(frvrs_logs_df.shape) # (829116, 125)

Attempting to load /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/frvrs_logs_df.pkl.
(829116, 125)



# Create a Data Frame of Scene Sequences

In [4]:

if nu.pickle_exists('scene_sequences_df'): scene_sequences_df = nu.load_object('scene_sequences_df')
else:
    rows_list = []
    engagment_columns_list = ['patient_id', 'engagement_start', 'location_tuple', 'patient_sort', 'predicted_priority', 'injury_severity']
    data_frames_dict = nu.load_data_frames(scene_stats_df='scene_stats_df')
    scene_stats_df = data_frames_dict['scene_stats_df']
    for (session_uuid, scene_id), idx_df in scene_stats_df.groupby(fu.scene_groupby_columns):
        
        # Get the whole scene history
        mask_series = True
        for cn in fu.scene_groupby_columns: mask_series &= (frvrs_logs_df[cn] == eval(cn))
        scene_df = frvrs_logs_df[mask_series]
        
        # Get the engagement sequence and the stats from that
        actual_engagement_order = fu.get_order_of_actual_engagement(scene_df, verbose=False)
        if actual_engagement_order:
            df = DataFrame(actual_engagement_order, columns=engagment_columns_list)
            row_dict = {cn: eval(cn) for cn in fu.scene_groupby_columns}
            patient_sorts_list = df.apply(lambda row_series: row_series.patient_sort, axis='columns').tolist()
            injury_severitys_list = df.apply(lambda row_series: row_series.injury_severity, axis='columns').tolist()
            for i, (patient_sort, injury_severity) in enumerate(zip(patient_sorts_list, injury_severitys_list)):
                row_dict[f'patient_sort{i:02d}'] = patient_sort
                row_dict[f'injury_severity{i:02d}'] = injury_severity
                row_dict[f'combo{i:02d}'] = str(patient_sort) + '-' + str(injury_severity)
            rows_list.append(row_dict)
    scene_sequences_df = DataFrame(rows_list)
    nu.store_objects(scene_sequences_df=scene_sequences_df)
    nu.save_data_frames(scene_sequences_df=scene_sequences_df)
print(scene_sequences_df.shape) # (696, 53)

(696, 70)


In [5]:

if 'severe_hemorrhaging00' not in scene_sequences_df.columns:
    severely_injured_columns = [cn for cn in scene_sequences_df.columns if cn.startswith('severely_injured')]
    if severely_injured_columns: scene_sequences_df = scene_sequences_df.drop(columns=severely_injured_columns)
    engagment_columns_list = ['patient_id', 'engagement_start', 'severe_hemorrhaging']
    for (session_uuid, scene_id), idx_df in scene_sequences_df.groupby(fu.scene_groupby_columns):
        
        # Get the whole scene history
        mask_series = True
        for cn in fu.scene_groupby_columns: mask_series &= (frvrs_logs_df[cn] == eval(cn))
        scene_df = frvrs_logs_df[mask_series]
        
        engagement_starts_list = []
        for patient_id, patient_df in scene_df.groupby('patient_id'):
            severe_hemorrhaging = fu.get_is_patient_severely_injured(patient_df)
            
            # Check if the responder even interacted with this patient
            mask_series = patient_df.action_type.isin(fu.responder_negotiations_list)
            if mask_series.any():
                
                # Get the list of engagements as action ticks
                engagements_list = patient_df[mask_series].action_tick
                mask_series = patient_df.action_tick.isin(engagements_list)
                if mask_series.any():
                    df = patient_df[mask_series].sort_values('action_tick')
                    
                    # Get the first engagement start
                    engagement_start = df.iloc[0].action_tick
                    
                    # Add engagement information to the list
                    engagement_tuple = (patient_id, engagement_start, severe_hemorrhaging)
                    engagement_starts_list.append(engagement_tuple)
        
        # Sort the starts list chronologically
        actual_engagement_order = sorted(engagement_starts_list, key=lambda x: x[1], reverse=False)
        
        if actual_engagement_order:
            df = DataFrame(actual_engagement_order, columns=engagment_columns_list)
            severe_hemorrhagings_list = df.apply(lambda row_series: row_series.severe_hemorrhaging, axis='columns').tolist()
            for i, severe_hemorrhaging in enumerate(severe_hemorrhagings_list):
                scene_sequences_df.loc[idx_df.index, f'severe_hemorrhaging{i:02d}'] = severe_hemorrhaging
    nu.store_objects(scene_sequences_df=scene_sequences_df)
    nu.save_data_frames(scene_sequences_df=scene_sequences_df)

In [7]:

if 'sort_hemorrhaging00' not in scene_sequences_df.columns:
    engagment_columns_list = ['patient_id', 'engagement_start', 'sort_hemorrhaging']
    for (session_uuid, scene_id), idx_df in scene_sequences_df.groupby(fu.scene_groupby_columns):
        
        # Get the whole scene history
        mask_series = True
        for cn in fu.scene_groupby_columns: mask_series &= (frvrs_logs_df[cn] == eval(cn))
        scene_df = frvrs_logs_df[mask_series]
        
        engagement_starts_list = []
        for patient_id, patient_df in scene_df.groupby('patient_id'):
            
            # Check if the responder even interacted with this patient
            mask_series = patient_df.action_type.isin(fu.responder_negotiations_list)
            if mask_series.any():
                df = patient_df[mask_series].sort_values('action_tick')

                # Get the first engagement start
                engagement_start = df.iloc[0].action_tick
                
                # Get the cluster ID, if available
                mask_series = ~patient_df.patient_sort.isnull()
                patient_sort = (
                    patient_df[mask_series].sort_values('action_tick').iloc[-1].patient_sort
                    if mask_series.any():
                    else None
                )
                sort_hemorrhaging = str(patient_sort) + '-' + str(fu.get_is_patient_severely_injured(patient_df))

                # Add engagement information to the list
                engagement_tuple = (patient_id, engagement_start, sort_hemorrhaging)
                engagement_starts_list.append(engagement_tuple)
        
        # Sort the starts list chronologically
        actual_engagement_order = sorted(engagement_starts_list, key=lambda x: x[1], reverse=False)
        
        if actual_engagement_order:
            df = DataFrame(actual_engagement_order, columns=engagment_columns_list)
            sort_hemorrhagings_list = df.apply(lambda row_series: row_series.sort_hemorrhaging, axis='columns').tolist()
            for i, sort_hemorrhaging in enumerate(sort_hemorrhagings_list):
                scene_sequences_df.loc[idx_df.index, f'sort_hemorrhaging{i:02d}'] = sort_hemorrhaging
    nu.store_objects(scene_sequences_df=scene_sequences_df)
    nu.save_data_frames(scene_sequences_df=scene_sequences_df)

Pickling to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/pkl/scene_sequences_df.pkl
Saving to /mnt/c/Users/DaveBabbitt/Documents/GitHub/itm-analysis-reporting/saves/csv/scene_sequences_df.csv


In [None]:

scene_sequences_df.sample(8).dropna(axis='columns', how='all').T

In [None]:

for cn in ['patient_sort', 'injury_severity', 'combo']:
    print(cn)
    sequence_items_list = []
    value_counts_series = scene_sequences_df[f'{cn}00'].value_counts()
    sequence_item = value_counts_series.index[0]
    sequence_items_list.append(sequence_item)
    mask_series = (scene_sequences_df[f'{cn}00'] == sequence_item)
    mask_series_dict = {0: mask_series}
    for i in range(1, 17):
        value_counts_series = scene_sequences_df[mask_series_dict[i-1]][f'{cn}{i:02d}'].value_counts()
        if value_counts_series.shape[0]:
            sequence_item = value_counts_series.index[0]
            sequence_items_list.append(sequence_item)
            mask_series = (scene_sequences_df[f'{cn}{i:02d}'] == sequence_item)
            mask_series_dict[i] = mask_series
        else: break
    print(sequence_items_list)
    for key, mask_series in mask_series_dict.items(): print(key, scene_sequences_df[mask_series].shape)
    display(scene_sequences_df[mask_series].dropna(axis='columns', how='all').T)
    df = scene_sequences_df[mask_series_dict[9]]
    display(df.sample(8).dropna(axis='columns', how='all').T)

In [None]:

# What are the worst examples of engagement sequences?
for cn, el in zip(['patient_sort', 'injury_severity', 'combo'], ['still', 'high', 'still-high']):
    for i in range(16, -1, -1):
        mask_series = (scene_sequences_df[f'{cn}{i:02d}'] == el)
        if mask_series.any(): break
    print(f'{cn}{i:02d}')
    df = scene_sequences_df[mask_series]
    print(df.shape)
    print([df[f'{cn}{i:02d}'].squeeze() for i in range(1, 17)])
    display(df.dropna(axis='columns', how='all').T)


### SORT sequence

In [8]:

# Get all patient SORT order sequences for 11-patient scenes
sequences = [seq for seq in product(fu.patient_sort_order, repeat=11) if len(seq) == 11]
print(f'{len(sequences):,}')

177147


In [9]:

# Discover the most popular sequence
max_count = -1
max_patient_sort_sequence = []
for seq in tqdm(sequences):
    mask_series = True
    for i, sort_category in enumerate(seq): mask_series &= (scene_sequences_df[f'patient_sort{i:02d}'] == sort_category)
    sequence_count = scene_sequences_df[mask_series].shape[0]
    if max_count < sequence_count:
        max_count = sequence_count
        max_patient_sort_sequence = seq

  0%|          | 0/177147 [00:00<?, ?it/s]

In [10]:

max_sequence_str = nu.conjunctify_nouns([f'"{seq}"' for seq in max_patient_sort_sequence])
print(
    f'The most popular patient engagement SORT sequence in the 11-patient scenes is {max_sequence_str}'
    f' (used in {max_count} different scenes).'
    ' So the Right Ordering (still patients first, then waver patients, then walker patients) can still be thought of as a good measure.'
)

The most popular patient engagement SORT sequence in the 11-patient scenes is "still", "still", "still", "waver", "waver", "waver", "waver", "waver", "walker", "walker", and "walker" (used in 55 different scenes). So the Right Ordering (still patients first, then waver patients, then walker patients) can still be thought of as a good measure.



### Injury severity sequence

In [8]:

sequences = [seq for seq in product(fu.injury_severity_order, repeat=11) if len(seq) == 11]
print(f'{len(sequences):,}')

177147


In [9]:

max_count = -1
max_injury_severity_sequence = []
for seq in tqdm(sequences):
    mask_series = True
    for i, sort_category in enumerate(seq): mask_series &= (scene_sequences_df[f'injury_severity{i:02d}'] == sort_category)
    sequence_count = scene_sequences_df[mask_series].shape[0]
    if max_count < sequence_count:
        max_count = sequence_count
        max_injury_severity_sequence = seq

  0%|          | 0/177147 [00:00<?, ?it/s]

In [10]:

max_sequence_str = nu.conjunctify_nouns([f'"{seq}"' for seq in max_injury_severity_sequence])
print(
    f'The most popular patient engagement by injury severity sequence in the 11-patient scenes is {max_sequence_str}'
    f' (used in {max_count} different scenes).'
)

The most popular patient engagement by injury severity sequence in the 11-patient scenes is "high", "high", "high", "medium", "high", "medium", "medium", "medium", "low", "low", and "low" (used in 26 different scenes).



### SORT-severity sequence

In [12]:

# Get the combo alphabet list
# combos_alphabet = sorted(
#     set([sequence_tuple[0] + '-' + sequence_tuple[1] for sequence_tuple in product(fu.patient_sort_order, fu.injury_severity_order)])
# )
combos_alphabet = sorted(
    set([str(patient_sort) + '-' + str(injury_severity) for patient_sort, injury_severity in zip(
        max_patient_sort_sequence, max_injury_severity_sequence
    )])
)
combos_alphabet

['still-high', 'walker-low', 'waver-high', 'waver-medium']

In [20]:

sequences = []
for stills_count in range(12):
    for wavers_count in range(12):
        for walkers_count in range(12):
            if (stills_count + wavers_count + walkers_count == 11):
                still_tuples_list = [seq for seq in product(['still-high'], repeat=stills_count) if len(seq) == stills_count]
                waver_tuples_list = [seq for seq in product(['waver-high', 'waver-medium'], repeat=wavers_count) if len(seq) == wavers_count]
                walker_tuples_list = [seq for seq in product(['walker-low'], repeat=walkers_count) if len(seq) == walkers_count]
                for still_tuple, waver_tuple, walker_tuple in product(still_tuples_list, waver_tuples_list, walker_tuples_list):
                    combined_list = list(still_tuple) + list(waver_tuple) + list(walker_tuple)
                    sequences.append(combined_list)
print(f'{len(sequences):,}')

8,178


In [21]:

max_count = -1
max_combo_sequence = []
for seq in tqdm(sequences):
    mask_series = True
    for i, sort_category in enumerate(seq): mask_series &= (scene_sequences_df[f'combo{i:02d}'] == sort_category)
    sequence_count = scene_sequences_df[mask_series].shape[0]
    if max_count < sequence_count:
        max_count = sequence_count
        max_combo_sequence = seq

  0%|          | 0/8178 [00:00<?, ?it/s]

In [22]:

max_sequence_str = nu.conjunctify_nouns([f'"{seq}"' for seq in max_combo_sequence])
print(
    f'The most popular patient engagement by SORT-severity sequence in the 11-patient scenes is {max_sequence_str} (used in {max_count} different scenes).'
)

The most popular patient engagement by SORT-severity sequence in the 11-patient scenes is "still-high", "still-high", "still-high", "waver-medium", "waver-high", "waver-medium", "waver-medium", "waver-medium", "walker-low", "walker-low", and "walker-low" (used in 26 different scenes).



### Severe-hemorrhaging sequence

In [11]:

sequences = [seq for seq in product([True, False], repeat=11) if len(seq) == 11]
print(f'{len(sequences):,}')

2048


In [12]:

max_count = -1
max_severe_hemorrhaging_sequence = []
for seq in tqdm(sequences):
    mask_series = True
    for i, sort_category in enumerate(seq): mask_series &= (scene_sequences_df[f'severe_hemorrhaging{i:02d}'] == sort_category)
    sequence_count = scene_sequences_df[mask_series].shape[0]
    if max_count < sequence_count:
        max_count = sequence_count
        max_severe_hemorrhaging_sequence = seq

  0%|          | 0/2048 [00:00<?, ?it/s]

In [13]:

max_sequence_str = nu.conjunctify_nouns([f'"{seq}"' for seq in max_severe_hemorrhaging_sequence])
print(
    'The most popular patient engagement by has-severe-hemorrhaging sequence in the 11-patient scenes is'
    f' {max_sequence_str} (used in {max_count} different scenes).'
)

The most popular patient engagement by has-severe-hemorrhaging sequence in the 11-patient scenes is "False", "True", "False", "False", "True", "False", "False", "False", "False", "False", and "False" (used in 45 different scenes).



### SORT-severe-hemorrhaging sequence

In [14]:

# Get the sort-hemorrhaging alphabet list
# sort_hemorrhagings_alphabet = sorted(set([sequence_tuple[0] + '-' + str(sequence_tuple[1]) for sequence_tuple in product(
#     fu.patient_sort_order, [True, False]
# )]))
sort_hemorrhagings_alphabet = sorted(set([str(patient_sort) + '-' + str(injury_severity) for patient_sort, injury_severity in zip(
    max_patient_sort_sequence, max_severe_hemorrhaging_sequence
)]))
sort_hemorrhagings_alphabet

['still-False', 'still-True', 'walker-False', 'waver-False', 'waver-True']

In [23]:

sequences = []
for stills_count in range(12):
    for wavers_count in range(12):
        for walkers_count in range(12):
            if (stills_count + wavers_count + walkers_count == 11):
                still_tuples_list = [seq for seq in product(['still-False', 'still-True'], repeat=stills_count) if len(seq) == stills_count]
                waver_tuples_list = [seq for seq in product(['waver-False', 'waver-True'], repeat=wavers_count) if len(seq) == wavers_count]
                walker_tuples_list = [seq for seq in product(['walker-False'], repeat=walkers_count) if len(seq) == walkers_count]
                for still_tuple, waver_tuple, walker_tuple in product(still_tuples_list, waver_tuples_list, walker_tuples_list):
                    combined_list = list(still_tuple) + list(waver_tuple) + list(walker_tuple)
                    if (len(combined_list) == 11): sequences.append(combined_list)
print(f'{len(sequences):,}') # 45,057

45,057


In [18]:

scene_sequences_df = nu.load_object('scene_sequences_df')
max_count = -1
max_sort_hemorrhaging_sequence = []
for seq in tqdm(sequences):
    mask_series = True
    for i, sort_category in enumerate(seq): mask_series &= (scene_sequences_df[f'sort_hemorrhaging{i:02d}'] == sort_category)
    sequence_count = scene_sequences_df[mask_series].shape[0]
    if max_count < sequence_count:
        max_count = sequence_count
        max_sort_hemorrhaging_sequence = seq

  0%|          | 0/45057 [00:00<?, ?it/s]

In [19]:

max_sequence_str = nu.conjunctify_nouns([f'"{seq}"' for seq in max_sort_hemorrhaging_sequence])
print(
    f'The most popular patient engagement by SORT-severe-hemorrhaging sequence in the 11-patient scenes is {max_sequence_str} (used in {max_count} different scenes).'
)

The most popular patient engagement by SORT-severe-hemorrhaging sequence in the 11-patient scenes is "still-False", "still-True", "still-False", "waver-False", "waver-True", "waver-False", "waver-False", "waver-False", "walker-False", "walker-False", and "walker-False" (used in 24 different scenes).
