In [1]:

%pprint
import sys
if (osp.join(os.pardir, 'py') not in sys.path): sys.path.insert(1, osp.join(os.pardir, 'py'))

Pretty printing has been turned OFF


In [2]:

from FRVRS import (fu, nu, DataFrame, display)
import os


# Post-engagement transactions for Affinity Analysis and Association Rule Learning

In [3]:

# Get all CSVs out of one data frame
if nu.pickle_exists('frvrs_logs_df'):
    frvrs_logs_df = nu.load_object('frvrs_logs_df')
    print(frvrs_logs_df.shape)
    # df = frvrs_logs_df.sample(4).dropna(axis='columns', how='all')
    # display(df.T)

(829116, 114)



## Decision Points

<p>
    OSU and Big Bear: With the IRB approval (for the previous OSU dataset) in place, I would like to get working on the dataset asap. That means:
    <ul>
        <li>Getting it deidentified and sharable</li>
        <li>If there is general demographic information like gender or level of expertise that can be associated with the VR performance, that would be useful.</li>
        <li>Big Bear: once it is available to you, I want an exploratory analysis on decision points such that I can see the variance in responses to the same situation.</li>
        <li>For example, if there are 3 waving patients and the participant chooses to move toward one and assess them, I want to know the sim state (visual info. on each patient, distance, etc.) and then a breakdown of how many participants chose to move toward each patient.</li>
        <li>Similar for treatment options, for each patient present in the simulated environment, what is the variance in which treatment is applied</li>
         <li>Does it depend on order?</li>
          <li>Timing?</li>
           <li>Be creative and ask all the questions. The goal is to provide TA1 with this dataset if there is anything useful to be found in analyzing the decisions (not necessarily that we find results but that the data is in shape to ask these questions). Let me know if you need additional clarification.</li>
/</p.


### Patient Engagement


These action types mean that the DM has made a decision: INJURY_TREATED, PATIENT_ENGAGED, PULSE_TAKEN, TAG_APPLIED, and TOOL_APPLIED.

In [4]:

df = frvrs_logs_df.sample(4).dropna(axis='columns', how='all').T
display(df.head(5))

Unnamed: 0,148879,450086,55389,271973
action_type,TOOL_HOVER,S_A_L_T_WALKED,TOOL_HOVER,S_A_L_T_WALK_IF_CAN
action_tick,245034,686716,318344,310231
event_time,2022-12-07 12:45:50,2022-03-15 09:40:16,2023-03-15 10:48:40,2022-03-15 10:13:03
session_uuid,331f875e-eba1-4033-a502-6a888aee4e9c,090d0988-3f81-4603-87e2-477538a6750c,71197277-ba36-4a82-9ae0-0016e7756665,aec5d448-c4e6-4af7-8e36-d258c7bb6f96
file_name,All CSV files renamed by date/12.07.22.1249.csv,Disaster Day 2022/MT_0950.csv,All CSV files renamed by date/03.15.23.1058.csv,Disaster Day 2022/JS_1016.csv


In [None]:

# Get the actions in the same or previous minute to the end of the session
actions_list = []
for (session_uuid, scene_id), scene_df in fu.get_session_groupby(frvrs_logs_df, mask_series=None, extra_column='scene_id'):
    mask_series = scene_df.action_type.isin(['SESSION_END'])
    end_minutes_list = sorted(scene_df[mask_series].event_time.map(lambda x: x.replace(second=0, microsecond=0)).tolist())
    all_minutes_list = sorted(scene_df.event_time.map(lambda x: x.replace(second=0, microsecond=0)).unique().tolist())
    
    previous_minutes_list = []
    for event_time in end_minutes_list:
        if event_time in all_minutes_list:
            previous_element = all_minutes_list[all_minutes_list.index(event_time) - 1]
            previous_minutes_list.append(previous_element)
    
    for (previous_minute, end_minute) in zip(previous_minutes_list, end_minutes_list):
        mask_series = scene_df.event_time.map(lambda x: x.replace(second=0, microsecond=0)).isin([previous_minute, end_minute])
        action_types_list = []
        if scene_df[mask_series].shape[0] > 1: action_types_list = scene_df[mask_series].action_type.tolist()
        if action_types_list: actions_list.append(action_types_list)
len(actions_list)

In [None]:

# Get the actions in the same minute as the end of the session
actions_list = []
for (session_uuid, scene_id), scene_df in fu.get_session_groupby(frvrs_logs_df, mask_series=None, extra_column='scene_id'):
    mask_series = scene_df.action_type.isin(['SESSION_END'])
    end_minutes_list = sorted(scene_df[mask_series].event_time.map(lambda x: x.replace(second=0, microsecond=0)).tolist())
    all_minutes_list = sorted(scene_df.event_time.map(lambda x: x.replace(second=0, microsecond=0)).unique().tolist())
    
    for end_minute in end_minutes_list:
        mask_series = scene_df.event_time.map(lambda x: x.replace(second=0, microsecond=0)).isin([end_minute])
        action_types_list = []
        if scene_df[mask_series].shape[0] > 1: action_types_list = scene_df[mask_series].action_type.tolist()
        if action_types_list: actions_list.append(action_types_list)
len(actions_list)

In [None]:

from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit(actions_list).transform(actions_list)
actions_one_hot_df = DataFrame(te_ary, columns=te.columns_)
print(actions_one_hot_df.shape)
print(actions_one_hot_df.columns.tolist())
df = actions_one_hot_df.sample(min(4, actions_one_hot_df.shape[0])).dropna(axis='columns', how='all').T
df.sample(min(4, df.shape[0]))

In [None]:

from mlxtend.frequent_patterns import apriori

actions_itemsets_df = apriori(
    actions_one_hot_df, min_support=0.01, use_colnames=True, max_len=3
)
print(actions_itemsets_df.shape)
actions_itemsets_df.sample(min(4, actions_itemsets_df.shape[0])).dropna(axis='columns', how='all').T

In [None]:

if nu.pickle_exists('actions_rules_df'):
    actions_rules_df = nu.load_object('actions_rules_df')
else:
    from mlxtend.frequent_patterns import association_rules
    actions_rules_df = association_rules(actions_itemsets_df, metric='lift', min_threshold=0.75)
    nu.store_objects(actions_rules_df=actions_rules_df)
print(actions_rules_df.shape)
display(actions_rules_df.sample(min(4, actions_rules_df.shape[0])).dropna(axis='columns', how='all').T)

In [None]:

mask_series = (actions_rules_df.consequents == frozenset({'SESSION_END'}))
df = actions_rules_df[mask_series].sort_values(['lift', 'antecedent support'], ascending=[False, False])
if df.head(5).shape[0]:
    print(f'Get the antecedent with the highest support for session end')
    antecedents_list = [str(tuple(x)).replace("('", '').replace("',)", '') for x in df.head(5).antecedents.tolist()]
    if antecedents_list:
        print('The antecedents with highest lift and support to SESSION_END,', end='')
        print(f' and probably therefore to PATIENT_ENGAGEMENT_END, are {nu.conjunctify_nouns(antecedents_list)}.')
    display(df.head(5))

In [None]:

mask_series = (actions_rules_df.antecedents == frozenset({'VOICE_CAPTURE'}))
df = actions_rules_df[mask_series].sort_values(['lift', 'antecedent support'], ascending=[False, False])
if df.head(5).shape[0]:
    print(f'Get the consequent with the highest support for voice capture')
    consequents_list = [str(tuple(x)) for x in df.head(5).consequents.tolist()]
    if consequents_list:
        print('The consequents with highest lift and support to VOICE_CAPTURE,', end='')
        print(f' are {nu.conjunctify_nouns(consequents_list)}.')
    display(df.head(5).T)


### Affinity Analysis and Association Rule Learning using the Apriori Algorithm

In [None]:

# Record as transactions a more complex example of a patient engagement
if nu.pickle_exists('all_transactions'):
    all_transactions = nu.load_object('all_transactions')
else:
    all_transactions = []
    for (session_uuid, scene_id), scene_df in fu.get_session_groupby(
        frvrs_logs_df, mask_series=None, extra_column='scene_id'
    ):
        mask_series = (frvrs_logs_df.session_uuid == session_uuid) & (frvrs_logs_df.scene_id == scene_id)
        action_types_list = frvrs_logs_df[mask_series].action_type.tolist()
        all_transactions.append(action_types_list)
    nu.store_objects(all_transactions=all_transactions)

In [None]:

def do_affinity_analysis(actions_type, verbose=False):
    if verbose:
        print(f'{actions_type.title()} Actions for Affinity Analysis and Association Rule Learning', flush=True)
    
    if verbose: print('Collect the inferred rules in a data frame', flush=True)
    if nu.pickle_exists(f'{actions_type}_actions_rules_df'):
        actions_rules_df = nu.load_object(f'{actions_type}_actions_rules_df')
    else:
        if verbose: print('Build the model', flush=True)
        if nu.pickle_exists(f'{actions_type}_actions_itemsets_df'):
            actions_itemsets_df = nu.load_object(f'{actions_type}_actions_itemsets_df')
        else:
            if verbose: print('Convert the list of lists to one hot', flush=True)
            if nu.pickle_exists(f'{actions_type}_actions_one_hot_df'):
                actions_one_hot_df = nu.load_object(f'{actions_type}_actions_one_hot_df')
                print(f'{actions_type}_actions_one_hot_df.shape: {actions_one_hot_df.shape}', flush=True)
            else:
                from mlxtend.preprocessing import TransactionEncoder
                te = TransactionEncoder()
                if verbose: print('Record as actions a more complex example of a patient engagement', flush=True)
                if nu.pickle_exists(f'{actions_type}_actions_list'):
                    actions_list = nu.load_object(f'{actions_type}_actions_list')
                else:
                    actions_list = []
                    if verbose: print('Get all CSVs out of one data frame', flush=True)
                    if nu.pickle_exists('frvrs_logs_df'):
                        frvrs_logs_df = nu.load_object('frvrs_logs_df')
                    print(f'frvrs_logs_df.shape: {frvrs_logs_df.shape}', flush=True)
                    for (session_uuid, action_tick), df in fu.get_session_groupby(frvrs_logs_df, mask_series=None, extra_column='action_tick'):
                        action_types_list = []
                        if df.shape[0] > 1: action_types_list = df.action_type.tolist()
                        if action_types_list: actions_list.append(action_types_list)
                    nu.store_objects(**{f'{actions_type}_actions_list': actions_list})
                if verbose:
                    list_length = len(actions_list)
                    print(
                        f'We have {list_length:,} simultaneous actions in our list. Here are some examples:',
                        flush=True
                    )
                    max_length = 0
                    max_list = []
                    counter = 0
                    import random
                    for action_list in actions_list:
                        if (counter < 5) and (random.random() <= 10/list_length):
                            print(action_list)
                            counter += 1
                        if max_length < len(action_list):
                            max_length = len(action_list)
                            max_list = action_list
                    print(
                        f'\nIn one sim log, these {max_length} actions occur simultaneously:'
                        f'{nu.conjunctify_nouns(max_list)}.'
                    )
                te_ary = te.fit(actions_list).transform(actions_list)
                actions_one_hot_df = DataFrame(te_ary, columns=te.columns_)
                nu.store_objects(**{f'{actions_type}_actions_one_hot_df': actions_one_hot_df})
            print(f'{actions_type}_actions_one_hot_df.shape: {actions_one_hot_df.shape}', flush=True)
            from mlxtend.frequent_patterns import apriori
            actions_itemsets_df = apriori(
                actions_one_hot_df, min_support=0.01, use_colnames=True, max_len=50
            )
            actions_itemsets_df['itemsets_size'] = actions_itemsets_df.itemsets.map(lambda x: len(eval(str(x))))
            nu.store_objects(**{f'{actions_type}_actions_itemsets_df': actions_itemsets_df})
        print(f'{actions_type}_actions_itemsets_df.shape: {actions_itemsets_df.shape}', flush=True)
        display(actions_itemsets_df.sort_values('itemsets_size', ascending=False).head(5))
        from mlxtend.frequent_patterns import association_rules
        actions_rules_df = association_rules(actions_itemsets_df, metric='lift', min_threshold=0.75)
        nu.store_objects(**{f'{actions_type}_actions_rules_df': actions_rules_df})
    print(f'{actions_type}_actions_rules_df.shape: {actions_rules_df.shape}', flush=True)
    if verbose:
        display(actions_rules_df.head().T)
        metric_dict = {}
        for metric in [
            'antecedent support', 'consequent support', 'support', 'confidence', 'lift', 'leverage', 'conviction',
            'zhangs_metric'
        ]:
            max_metric = actions_rules_df[metric].max()
            print(metric)
            metric_dict[metric] = max_metric
            mask_series = (actions_rules_df[metric] == max_metric)
            display(actions_rules_df[mask_series].head())
        metrics_list = sorted(
            [(metric, max_metric) for metric, max_metric in metric_dict.items()], key=lambda x: x[1], reverse=True
        )
        display(actions_rules_df.sort_values([x[0] for x in metrics_list], ascending=[False]*len(metrics_list)))
    
    mask_series = (actions_rules_df.antecedents == frozenset({'PATIENT_ENGAGED'}))
    df = actions_rules_df[mask_series].sort_values(['lift', 'antecedent support'], ascending=[False, False])
    if df.head(5).shape[0]:
        print(f'Get the consequent with the highest support for patient engaged')
        display(df.head(5))
    
    mask_series = (actions_rules_df.consequents == frozenset({'PATIENT_ENGAGED'}))
    df = actions_rules_df[mask_series].sort_values(['lift', 'antecedent support'], ascending=[False, False])
    if df.head(5).shape[0]:
        print(f'Get the antecedent with the highest support for patient engaged')
        display(df.head(5))
    
    mask_series = (actions_rules_df.consequents == frozenset({'SESSION_END'}))
    df = actions_rules_df[mask_series].sort_values(['lift', 'antecedent support'], ascending=[False, False])
    if df.head(5).shape[0]:
        print(f'Get the antecedent with the highest support for session end')
        antecedents_list = [str(tuple(x)) for x in df.head(5).antecedents.tolist()]
        if antecedents_list:
            print('The antecedents with highest lift and support to SESSION_END,', end='')
            print(f' and probably therefore to PATIENT_ENGAGEMENT_END, are {nu.conjunctify_nouns(antecedents_list)}.')
        display(df.head(5))
    
    mask_series = (actions_rules_df.antecedents == frozenset({'SESSION_START'}))
    df = actions_rules_df[mask_series].sort_values(['lift', 'antecedent support'], ascending=[False, False])
    if df.head(5).shape[0]:
        print(f'Get the consequent with the highest support for session start')
        display(df.head(5))

In [None]:

do_affinity_analysis(actions_type='next', verbose=False)


## Do Affinity analysis on the actions that are logged at the exact same time

In [None]:

do_affinity_analysis(actions_type='simultaneous', verbose=False)