In [1]:
import numpy as np
import pandas as pd
import scipy.stats

pd.options.mode.chained_assignment = None
from matplotlib import pyplot as plt, rcParams
# import cv2
import seaborn as sns

sns.set(style="white", context="paper")
from cycler import cycler
import os, sys
import glob
from datetime import datetime, timedelta
from itertools import combinations
import base64
from PIL import Image
from io import BytesIO as _BytesIO
import requests
import json
import pickle
from datetime import datetime
from IPython.display import display, Markdown, Latex
from sklearn.metrics import *
import collections
from copy import deepcopy

# import plotly
# from pandas_profiling import ProfileReport

pd.options.display.max_columns = None


def printm(s): return display(Markdown(s))


## Configs for data fetch and plotting

In [2]:

# Result summary prefix
result_summary_prefix = 'results_summary'

#timestamped predictions prefix
ts_prediction_file_prefix = 'timestamped_predictions'

# For cluster centers
NUM_CLUSTERS = 30

# Ground Truth
gt_file = f"../../GT_marking/gt_realworld.csv"

#for fetching specific results
fetch_time_str = "20210810"


In [3]:
## config for various kinds of graphs

gconfigs = {
    'barplot': {'color': 'blue', 'linestyle': '-.', 'marker': '.', 'alpha': 0.5}
}


#Percentile calculations
def perc_75(x): return np.percentile(x, 75)


def perc_25(x): return np.percentile(x, 25)


#Set default RC parameters
notebook_default_rcparams = {
    "axes.titlesize": 32,
    "axes.labelsize": 32,
    "legend.fontsize": 32,
    "legend.title_fontsize": 32,
    "xtick.labelsize": 32,
    "ytick.labelsize": 32,
    "axes.grid": True,
    "legend.framealpha": 0.5,
    "lines.linewidth": 5,
    "legend.loc": 'upper left'

}
rcParams.update(notebook_default_rcparams)

# Standardized Labels

EPSILON = 2e-2
#plotting dir


plotting_dir = f'plots/{datetime.now().strftime("%Y%m%d")}'
if not os.path.exists(plotting_dir):
    os.makedirs(plotting_dir)
out_result_dir = f'results/{datetime.now().strftime("%Y%m%d")}'
if not os.path.exists(out_result_dir):
    os.makedirs(out_result_dir)


# Collect Ground Truth from gt file for realdataset


In [4]:
df_gt = pd.read_csv(gt_file)
df_gt.info()
df_gt.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 462 entries, 0 to 461
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   id          462 non-null    object
 1   start_time  462 non-null    int64 
 2   end_time    462 non-null    int64 
 3   context     462 non-null    object
dtypes: int64(2), object(2)
memory usage: 14.6+ KB


Unnamed: 0,id,start_time,end_time,context
0,p4_3,1662481192,1662481208,Exercising
1,p4_3,1662481233,1662481253,OfficeWork
2,p4_3,1662481266,1662481279,Exercising
3,p4_3,1662481304,1662481318,Exercising
4,p4_3,1662481327,1662481340,Exercising


# Final combined function for evaluation given experiment results and timestamp results and gt results

In [5]:
# Labeling contexts with ontology

if True:
    df_onto = pd.read_csv('../../ontological_models/ontology_labels_aug11.csv', names=['activities', 'contexts'])
    df_onto['contexts'] = df_onto['contexts'].apply(lambda x: x.split(";"))
    onto_dict = df_onto.set_index('activities').to_dict()['contexts']

    # df_onto_pred = pd.read_csv('../../ontological_models/ontology_predictions_v2.csv')
    # df_onto_pred['tao_prediction'] = df_onto_pred['tao_prediction'].apply(
    #     lambda x: x.split(";") if not (str(x) == 'nan') else ['Unknown'])
    # df_onto_pred = pd.read_csv('../../datasets/realworld_dataset.csv')
    # df_onto_pred['tao_prediction'] = df_onto_pred['session_id'].apply(lambda x: ['HavingMeal'])
    # df_onto_pred = df_onto_pred[['session_id','timestamp','tao_prediction']]
    # df_onto_pred.columns = ['id','timestamp','tao_prediction']


activity_rename_mapping = {

    'realworld':{
        "coffeemachine": "Cooking",
        "doorknock": "StepIn",
        "dooropen": "StepIn",
        "eating": "Eating",
        "jumping": "Hiking",
        "mouse": "TypingOffice",
        "phonering": "OnPhone",
        "running": "Hiking",
        "sweeping": "VacuumHome",
        "talking": "Talking",
        "tv": "WatchingTv",
        "typing": "TypingOffice",
        "vacuum": "VacuumHome",
        "writing": "SittingOffice",
    },
    'extrasensory': {
        'lying': 'LyingDown',
        'sitting': 'Sitting',
        'walking': 'Walking',
        'running': 'Running',
        'cycling': 'Cycling',
        'sleeping': 'Sleeping',
        'meeting': 'Meeting',
        'driving': 'Driving',
        'exercising': 'Hiking',
        'cooking': 'Cooking',
        'shopping': 'Shopping',
        'drinking': 'Drinking',
        'shower': 'Shower',
        'cleaning': 'VacuumHome',
        'laundry': 'VacuumHome',
        'clean_dishes': 'VacuumHome',
        'watching_tv': 'WatchingTv',
        'surfing_internet': 'ReadingOffice',
        'singing': 'Dancing',
        'talking': 'Talking',
        'office_work': 'TypingOffice',
        'eating': 'Eating',
        'toilet': 'Toilet',
        'grooming': 'Grooming',
        'dressing_up': 'Grooming',
        'stairs': 'ClimbingStairs',
        'standing': 'Standing',
        'meeting_coworkers': 'Meeting',
        'meeting_friends': 'Dancing',

    },
    'casas': {
        'step_out': 'StepOut',
        'none': 'None',
        'toilet': 'Toilet',
        'onphone': 'OnPhone',
        'grooming': 'Grooming',
        'step_in': 'StepIn',
        'lying': 'LyingDown',
        'drinking': 'Drinking',
        'watching_tv': 'WatchingTv',
        'dressing_up': 'Grooming',
        'taking_meds': 'Eating',
        'wakingup': 'Sleeping',
        'reading': 'TypingOffice',
        'cooking': 'Cooking',
        'eating': 'Eating',
        'shower': 'Shower',
        'sleeping': 'Sleeping',
        'office_work': 'SittingOffice',
        'dishes_home': 'VacuumHome',
        'meeting_friends': 'Dancing',
        'exercising': 'Running',
        'laundry_home': 'VacuumHome'
    },
    'tsu': {
        "boil_water": "Cooking",
        "clean_with_water": "VacuumHome",
        "cut_cook": "Cooking",
        "cut_bread": "Cooking",
        "drink_cold": "Drinking",
        "drink_hot": "Drinking",
        "dry_up": "VacuumHome",
        "dump_in_trash": "VacuumHome",
        "eat_food": "Eating",
        "eat_snack": "Eating",
        "enter": "StepIn",
        "get_up": "Sleeping",
        "get_water": "Eating",
        "insert_tea_bag": "Drinking",
        "lay_down": "LyingDown",
        "leave": "StepOut",
        "pour_grains": "Drinking",
        "pour_water": "Drinking",
        "pour_cold": "Drinking",
        "pour_hot": "Drinking",
        "put_in_sink": "VacuumHome",
        "put_on_table": "VacuumHome",
        "read": "ReadingOffice",
        "sit_down": "Sitting",
        "spread_jam_or_butter": "Eating",
        "stir_cook": "Cooking",
        "stir_drink": "Drinking",
        "take_ham": "Cooking",
        "take_meds": "Eating",
        "take_off_table": "Eating",
        "use_furniture": "VacuumHome",
        "use_glasses": "Eating",
        "use_pc": "TypingOffice",
        "use_kitchen_utility": "VacuumHome",
        "use_telephone": "onPhone",
        "walk": "Walking",
        "watch_tv": "WatchingTv",
        "clean_table": "VacuumHome",
        "write": "ReadingOffice"
    }
}

tsu_context_mapping = {
    "ComingIn": ["tsuBreakfast"],
    "Commuting": ["tsuBreakfast", "tsuCook"],
    "GoingOut": ["tsuBreakfast"],
    "HavingMeal": ["tsuMakecoffee", "tsuMaketea"],
    "HouseWork": ["tsuCleandishes"],
    "PreparingMeal": ["tsuCook"],
    "Relaxing": ["tsuBreakfast"],
}


def label_context_v1(cluster_representation, activity_renaming, conf_activities=['sitting', 'standing', 'talking']):
    # print(cluster_representation)
    act_train = cluster_representation.split(">")
    act_train = [xr.split("+") for xr in act_train]

    unique_activities = set()
    for act_set in act_train:
        for activity in act_set:
            unique_activities.add(activity)
    for conf_act in conf_activities:
        try:
            unique_activities.remove(conf_act)
        except:
            ...

    if len(unique_activities) > 0:
        # print("More than conf activities in the set, removing conf activities for precise context labeling")
        act_train = [([activity for activity in act_set if (activity not in conf_activities)]) for act_set in act_train]
        # print(act_train)

    for i in range(len(act_train)):
        for j in range(len(act_train[i])):
            # print(act_train,act_train[i], act_train[i][j])
            if not act_train[i][j] == 'unknown':
                act_train[i][j] = activity_renaming[act_train[i][j]].lower()
            else:
                act_train[i][j] = 'none'
        act_train[i] = sorted(np.unique(act_train[i]).tolist())

    #for sequential contexts
    # try:
    seq_contexts = []
    for i in range(1, len(act_train)):
        set1, set2 = act_train[i - 1], act_train[i]
        for first_act in set1:
            for sec_act in set2:
                # print(f"seq: {first_act},{sec_act}")
                if not first_act == sec_act:
                    try:
                        seq_ctx = onto_dict[
                            f'{first_act}+{sec_act}']
                        if not (seq_ctx[0] == 'Unknown'):
                            seq_contexts += seq_ctx
                    except:
                        ...

    # for parallel contexts
    single_act_contexts = []
    par_contexts = []
    for act_set in act_train:
        if len(act_set) == 1:
            single_ctx = onto_dict[f"{act_set[0]}"]
            if not (single_ctx[0] == 'Unknown'):
                single_act_contexts += single_ctx
        else:
            for act1, act2 in combinations(act_set, 2):
                if not act1 == act2:
                    par_ctx = ['Unknown']
                    try:
                        par_ctx = onto_dict[f"{act1}_{act2}"]
                    except:
                        par_ctx = onto_dict[f"{act2}_{act1}"]
                    if not (par_ctx[0] == 'Unknown'):
                        par_contexts += par_ctx

    final_context_set = None
    if len(seq_contexts) > 0:
        final_context_set = np.unique(seq_contexts).tolist()
    elif len(par_contexts) > 0:
        final_context_set = np.unique(par_contexts).tolist()
    elif len(single_act_contexts) > 0:
        final_context_set = np.unique(single_act_contexts).tolist()
    else:
        all_contexts = []
        for act_set in act_train:
            for activity in act_set:
                single_ctx = onto_dict[f"{activity}"]
                if not (single_ctx[0] == 'Unknown'):
                    all_contexts += single_ctx
        final_context_set = np.unique(all_contexts).tolist()

    # tsu specific conversion
    if dataset == 'tsu':
        tsu_final_contexts = []
        for context in final_context_set:
            if context in tsu_context_mapping.keys():
                tsu_final_contexts += tsu_context_mapping[context]
        if len(tsu_final_contexts) > 0:
            return np.unique(tsu_final_contexts).tolist()
        else:
            return ['Unknown']
    else:
        return final_context_set

    # except:
    #     return ['Unknown']



In [6]:
# helper functions for main accuracy function

def get_ctx_vec(ctx_str, contexts):
    ctx_vec = np.zeros(len(contexts))
    for item_i in ctx_str.split(","):
        if not item_i in ['', 'Unknown']:
            ctx_vec[contexts.index(item_i)] = 1
    return ctx_vec

def get_cluster_labels(cluster_centers, dataset, labeling_func=label_context_v1):
    cluster_centers = [center.split(")__")[0].split("(")[-1] for center in cluster_centers]
    cluster_labels = [labeling_func(center, activity_rename_mapping[dataset]) for center in cluster_centers]
    return cluster_labels

def compile_ts_results_v2(ts_results, df_onto_pred, df_gt, cluster_labels):
    #compiled final timestamped results with GT
    compiled_ts_results_dict = dict()
    # compiled_instance_results_dict = dict()
    for key in ts_results.keys():
        df_pred_id = ts_results[key]
        df_pred_id['id'] = key
        df_gt_id = df_gt[df_gt.id == key]
        df_onto_pred_id = df_onto_pred[df_onto_pred.id==key]
        df_pred_id = df_pred_id[df_pred_id.end_timestamp >= df_gt_id.start_time.min()]
        df_pred_id = df_pred_id[df_pred_id.start_timestamp <= df_gt_id.end_time.max()]
        if (df_gt_id.shape[0] > 0) & (df_pred_id.shape[0] > 0) & (df_onto_pred_id.shape[0] > 0):
            # if (df_gt_id.shape[0] > 0) & (df_pred_id.shape[0] > 0):
            print(f"Starting on {key}")
            #process gt to timestamp, context format
            gt_min_ts, gt_max_ts = df_gt_id.start_time.min(), df_gt_id.end_time.max()
            df_gt_ts = pd.DataFrame(np.arange(gt_min_ts, gt_max_ts + 1), columns=['timestamp'])
            df_gt_ts['gt_context'] = ''
            df_gt_ts = df_gt_ts.set_index('timestamp')
            for row_idx, row in df_gt_id.iterrows():
                df_gt_ts.loc[row['start_time']:row['end_time'], 'gt_context'] = df_gt_ts.loc[
                                                                                row['start_time']:row['end_time'],
                                                                                'gt_context'].apply(
                    lambda x: row['context'] if (x == '') else (x + ',' + row['context']))

            #get ts based prediction
            pred_min_ts, pred_max_ts = df_pred_id.start_timestamp.min(), df_pred_id.end_timestamp.max()
            df_pred_ts = pd.DataFrame(np.arange(pred_min_ts, pred_max_ts + 1), columns=['timestamp'])
            df_pred_ts['pred_context'] = None
            df_pred_ts = df_pred_ts.set_index('timestamp')
            for row_idx, row in df_pred_id.iterrows():
                df_pred_ts.loc[row['start_timestamp']:row['end_timestamp'], 'pred_context'] = df_pred_ts.loc[
                                                                                              row['start_timestamp']:
                                                                                              row['end_timestamp'],
                                                                                              'pred_context'].apply(
                    lambda x: (x + cluster_labels[row['cluster_id']]) if x is not None else cluster_labels[
                        row['cluster_id']])


            #merge timestaped gt and predictions together for the user
            df_compiled_results_ts_id = pd.merge(df_pred_ts.reset_index(), df_gt_ts.reset_index(), on='timestamp')
            df_compiled_results_ts_id['id'] = key
            df_compiled_results_ts_id = df_compiled_results_ts_id[
                ['id', 'timestamp', 'gt_context', 'pred_context']]
            df_compiled_results_ts_id = df_compiled_results_ts_id[~df_compiled_results_ts_id.gt_context.isnull()]
            df_compiled_results_ts_id = df_compiled_results_ts_id[~df_compiled_results_ts_id.pred_context.isnull()]
            df_compiled_results_ts_id = pd.merge(df_compiled_results_ts_id,df_onto_pred_id,on =['id','timestamp'],suffixes=('','_onto'))
            df_compiled_results_ts_id['combined_context'] = df_compiled_results_ts_id.apply(lambda row: row['pred_context']+row['tao_prediction'],axis=1)

            # format as a comma separated string
            df_compiled_results_ts_id['onto_context'] = df_compiled_results_ts_id['tao_prediction'].apply(
                lambda x: ','.join(sorted(np.unique(x).tolist())))
            df_compiled_results_ts_id['pred_context'] = df_compiled_results_ts_id['pred_context'].apply(
                lambda x: ','.join(sorted(np.unique(x).tolist())))
            df_compiled_results_ts_id['combined_context'] = df_compiled_results_ts_id['combined_context'].apply(
                lambda x: ','.join(sorted(np.unique(x).tolist())))

            #remove null ground truth
            df_compiled_results_ts_id= df_compiled_results_ts_id[~(df_compiled_results_ts_id.gt_context=='')]
            #todo: added to deactivate ontology
            # df_compiled_results_ts_id['onto_context'] = df_compiled_results_ts_id['pred_context']
            # df_compiled_results_ts_id['combined_context'] = df_compiled_results_ts_id['pred_context']

            compiled_ts_results_dict[key] = df_compiled_results_ts_id
            print(f"Processed for id {key}")
    return compiled_ts_results_dict


def find_all_present_contexts(df_gt_ts):
    '''

    :param df_gt_ts:
    :type df_gt_ts:
    :return:
    :rtype:
    '''
    context_set = set()
    for context in df_gt_ts['gt_context'].unique():
        for item in context.split(","):
            context_set.add(item)

    for context in df_gt_ts['pred_context'].unique():
        for item in context.split(","):
            context_set.add(item)

    context_set.remove('')
    all_contexts = sorted(list(context_set))
    return all_contexts

def get_overall_metrics(gt_arr, pred_arr):

    # calculate spot by converting it into +0.2 JC
    gt_or_pred = np.sum(np.logical_or(gt_arr,pred_arr),axis=1)
    gt_eq_pred = np.sum(np.logical_and(gt_arr,pred_arr),axis=1)
    jc_samples = gt_eq_pred/gt_or_pred
    is_spot = jc_samples>0.2
    spot_acc = round(is_spot.sum()*100/is_spot.shape[0],2)
    # gt_xor_pred = np.sum(np.logical_xor(gt_arr,pred_arr),axis=1)
    # is_norm = (gt_xor_pred==0)
    # norm_acc = round(is_norm.sum()*100/is_norm.shape[0],2)



    jc_score_samples = round(jaccard_score(gt_arr, pred_arr, average='samples',zero_division=0) * 100, 2)
    f1_acc =round(f1_score(gt_arr, pred_arr, average='weighted',zero_division=0) * 100, 2)
    avg_prec = round(precision_score(gt_arr, pred_arr, average='weighted',zero_division=0) * 100, 2)
    avg_recall = round(recall_score(gt_arr, pred_arr, average='weighted',zero_division=0) * 100, 2)
    avg_f1 = round(f1_score(gt_arr, pred_arr, average='weighted',zero_division=0) * 100, 2)
    return f1_acc, avg_prec, avg_recall, jc_score_samples


# Loop over experiments to create overarching result csv

In [11]:

gt_cache_dir = "../../cache/sep12_rw/"
experiment_dirs = glob.glob(f'{gt_cache_dir}/*')
df_gt_onto_pred = pd.read_csv("../../ontological_models/real_world_ontology_predictions.csv")
# df_gt_onto_pred['tao_prediction'] = df_gt_onto_pred['tao_prediction'].apply(lambda x: x.replace('noevent','Unknown'))
df_gt_onto_pred['tao_prediction'] = df_gt_onto_pred['tao_prediction'].apply(
    lambda x: x.split(";") if not (str(x) == 'nan') else ['Unknown'])
df_gt_onto_pred['tao_prediction'] = df_gt_onto_pred['tao_prediction'].apply(
    lambda x: ['Unknown'] if (x[0]=='noevent') else x)
df_gt_onto_pred = df_gt_onto_pred.rename(columns={'session_id':'id'})
print(df_gt_onto_pred.head())

df_metrics =None
for experiment_dir in experiment_dirs:
    experiment = experiment_dir.split("/")[-1]
    experiment_out_dir = f'{out_result_dir}/{experiment}'
    if not os.path.exists(experiment_out_dir):
        os.makedirs(experiment_out_dir)

    printm(f"## --------------------------- Started Experiment: {experiment} ----------------------------")
    # Compile results into ts dict
    printm("### fetch results for experiment")
    if True:
        result_file = sorted(glob.glob(f"{gt_cache_dir}/{experiment}/results/{result_summary_prefix}*.json"))[-1]
        ts_file = sorted(glob.glob(f"{gt_cache_dir}/{experiment}/results/{ts_prediction_file_prefix}*.pb"))[-1]
        print(result_file)
        exp_results = json.load(open(result_file, 'r'))
        ts_results = pickle.load(open(ts_file, 'rb'))
        cluster_centers = exp_results['direct_labels']
        dataset = exp_results['run_config']['dataset']

    # Filter out only test instances from ts_results
    zero_shape_ids = []
    for id in ts_results.keys():
        # print(f"id:{id}:{ts_results[id].shape}")
        ts_results[id] = ts_results[id][ts_results[id].isTrain==False]
        # print(f"id:{id}:{ts_results[id].shape}")
        if ts_results[id].shape[0]==0:
            zero_shape_ids.append(id)
    for id in zero_shape_ids:
        del ts_results[id]


    printm("### get cluster labels and best representation accuracy")
    if True:
        direct_cluster_centers = exp_results['direct_labels']
        direct_cluster_labels = get_cluster_labels(direct_cluster_centers, dataset, label_context_v1)
        decoded_cluster_centers = exp_results['decoded_labels']
        decoded_cluster_labels = get_cluster_labels(decoded_cluster_centers, dataset, label_context_v1)
        cluster_labels = []
        for idx in range(len(decoded_cluster_labels)):
            if len(decoded_cluster_labels[idx]) > 0:
                cluster_labels.append(decoded_cluster_labels[idx])
            else:
                cluster_labels.append(direct_cluster_labels[idx])
        df_cluster_merge = pd.DataFrame(np.array([[f"{idx}:"+','.join(cr) for idx,cr in enumerate(direct_cluster_labels)],
                                                  [f"{idx}:"+','.join(cr) for idx,cr in enumerate(decoded_cluster_labels)],
                                                  [f"{idx}:"+','.join(cr) for idx,cr in enumerate(cluster_labels)]]).T,columns=['direct','decoded','combination_1'])
        df_cluster_merge.to_csv(f"{experiment_out_dir}/cluster_merge.csv")
        representation_acc = exp_results['repr_training_metrics'][-1]
        json.dump(representation_acc, open(f"{experiment_out_dir}/representation_accuracy.json","w"))

    printm("### compile GT, Onto and Temporal results together")
    compiled_results_ts_dict  = compile_ts_results_v2(ts_results, df_gt_onto_pred,df_gt, cluster_labels)
    # break
    # get all available context from onto, gt and temporal
    printm("### get all available contexts")
    if True:
        all_context_list = []
        for id in compiled_results_ts_dict.keys():
            df_ts_id = compiled_results_ts_dict[id]
            all_context_list += np.unique(df_ts_id['gt_context'].values).tolist()
            all_context_list += np.unique(df_ts_id['pred_context'].values).tolist()
            all_context_list += np.unique(df_ts_id['onto_context'].values).tolist()
        all_context_list = np.unique(all_context_list).tolist()
        all_context_list = [xr.split(",") for xr in all_context_list]
        all_context_list = sorted(np.unique(np.concatenate(all_context_list)).tolist())
        if '' in all_context_list:
            del all_context_list[all_context_list.index('')]
        if 'Unknown' in all_context_list:
            del all_context_list[all_context_list.index('Unknown')]
        json.dump(all_context_list, open(f"{experiment_out_dir}/all_contexts.json","w"))

    # Get metrics
    printm("### get timestamp level metrics")
    if True:
        metric_columns = ['gt_vec','onto_pred_vec','tp_pred_vec','combined_pred_vec']
        for id_key in compiled_results_ts_dict.keys():
            dft = compiled_results_ts_dict[id_key]
            dft['gt_vec'] = dft.apply(lambda row: get_ctx_vec(row['gt_context'], all_context_list),axis=1)
            dft['onto_pred_vec'] = dft.apply(lambda row: get_ctx_vec(row['onto_context'], all_context_list),axis=1)
            dft['tp_pred_vec'] = dft.apply(lambda row: get_ctx_vec(row['pred_context'], all_context_list),axis=1)
            dft['combined_pred_vec'] = dft.apply(lambda row: get_ctx_vec(row['combined_context'], all_context_list),axis=1)

        ts_metrics = np.vstack([compiled_results_ts_dict[id][metric_columns].values for id in compiled_results_ts_dict.keys()])
        df_ts_metrics = pd.DataFrame(ts_metrics,columns=metric_columns)
        pickle.dump(compiled_results_ts_dict,open(f"{experiment_out_dir}/compiled_results.pb","wb"))

    # Get overall accuracy metrics for timestamps
    printm("### get overall accuracy metrics")
    if True:
        gt_ts_arr = np.stack(df_ts_metrics['gt_vec'].values)
        onto_ts_arr = np.stack(df_ts_metrics['onto_pred_vec'].values)
        tp_ts_arr = np.stack(df_ts_metrics['tp_pred_vec'].values)
        combined_ts_arr = np.stack(df_ts_metrics['combined_pred_vec'].values)
        onto_ts_metrics = get_overall_metrics(gt_ts_arr, onto_ts_arr)
        tp_ts_metrics = get_overall_metrics(gt_ts_arr, tp_ts_arr)
        combined_ts_metrics = get_overall_metrics(gt_ts_arr,combined_ts_arr)
        df_overall_ts_metrics = pd.DataFrame([onto_ts_metrics,tp_ts_metrics,combined_ts_metrics],columns=['F1','PPV','TPR','JC'],index=['onto','temporal','combined'])
        df_overall_ts_metrics.to_csv(f"{experiment_out_dir}/overall_metrics.csv")
        print(df_overall_ts_metrics)
    # context level accuracy
    printm("### get context level accuracy metrics")
    if True:
        df_context_ts_metrics = pd.DataFrame(all_context_list, columns=['context'])
        df_context_ts_metrics['ppv_onto'] = precision_score(gt_ts_arr, onto_ts_arr, average=None,zero_division=0).round(4)*100
        df_context_ts_metrics['tpr_onto'] = recall_score(gt_ts_arr, onto_ts_arr, average=None,zero_division=0).round(4)*100
        df_context_ts_metrics['ppv_temporal'] = precision_score(gt_ts_arr, tp_ts_arr, average=None,zero_division=0).round(4)*100
        df_context_ts_metrics['tpr_temporal'] = recall_score(gt_ts_arr, tp_ts_arr, average=None,zero_division=0).round(4)*100
        df_context_ts_metrics['ppv_combined'] = precision_score(gt_ts_arr, combined_ts_arr, average=None,zero_division=0).round(4)*100
        df_context_ts_metrics['tpr_combined'] = recall_score(gt_ts_arr, combined_ts_arr, average=None,zero_division=0).round(4)*100
        df_context_ts_metrics.to_csv(f"{experiment_out_dir}/context_metrics.csv")
        print(df_context_ts_metrics)

    printm(f"## Finished Experiment {experiment}")




      id   timestamp activity tao_prediction
0  p10_1  1662667543  noevent      [Unknown]
1  p10_1  1662667544  noevent      [Unknown]
2  p10_1  1662667545  noevent      [Unknown]
3  p10_1  1662667546  noevent      [Unknown]
4  p10_1  1662667547  noevent      [Unknown]


## --------------------------- Started Experiment: p1_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw//p1_dataset_0.05_0.01_TAE/results/results_summary_20220913_101014.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p10_1
Processed for id p10_1
Starting on p10_2
Processed for id p10_2
Starting on p10_3
Processed for id p10_3
Starting on p10_4
Processed for id p10_4
Starting on p1_1
Processed for id p1_1
Starting on p1_2
Processed for id p1_2
Starting on p1_3
Processed for id p1_3
Starting on p1_4
Processed for id p1_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      71.81  88.48  61.00  65.57
temporal  72.42  69.77  78.50  66.23
combined  73.31  68.54  82.28  65.91


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn    100.00     60.56         94.67        100.00   
1       Exercising    100.00     93.53         94.97        100.00   
2       HavingMeal    100.00     71.11         76.77         88.15   
3        HouseWork    100.00     69.44         58.20         71.83   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork    100.00     70.03         86.02         84.20   
7        PhoneCall     53.95     33.47         68.95         61.63   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing    100.00     51.40         35.65         93.46   

   ppv_combined  tpr_combined  
0         94.67        100.00  
1         94.97        100.00  
2         76.77         88.15  
3         65.70         98.81  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p1_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p3_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw//p3_dataset_0.05_0.01_TAE/results/results_summary_20220913_101153.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p3_1
Processed for id p3_1
Starting on p3_2
Processed for id p3_2
Starting on p3_3
Processed for id p3_3
Starting on p3_4
Processed for id p3_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      68.16  87.32  56.48  61.28
temporal  70.65  71.44  75.07  63.72
combined  70.74  68.66  77.94  63.64


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn    100.00     56.41         97.50        100.00   
1       Exercising    100.00     93.42         66.09        100.00   
2       HavingMeal    100.00     62.50         78.89         80.68   
3        HouseWork    100.00     65.69         46.90         77.37   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork    100.00     66.95         89.80         78.45   
7        PhoneCall     60.23     32.32         90.74         59.76   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing    100.00     48.96         40.68        100.00   

   ppv_combined  tpr_combined  
0         97.50        100.00  
1         66.09        100.00  
2         78.89         80.68  
3         52.94         98.54  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p3_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p10_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw//p10_dataset_0.05_0.01_TAE/results/results_summary_20220913_101034.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p10_1
Processed for id p10_1
Starting on p10_2
Processed for id p10_2
Starting on p10_3
Processed for id p10_3
Starting on p10_4
Processed for id p10_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      73.02  87.43  63.33  68.25
temporal  72.58  67.91  79.22  68.86
combined  71.42  65.52  79.22  66.71


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn    100.00     74.36        100.00         97.44   
1       Exercising    100.00     94.32         93.62        100.00   
2       HavingMeal    100.00     80.33         75.36         85.25   
3        HouseWork    100.00     76.12         72.83        100.00   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork    100.00     71.15         69.48         81.73   
7        PhoneCall     52.27     35.11         68.47         58.02   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing    100.00     50.50         52.15         84.16   

   ppv_combined  tpr_combined  
0        100.00         97.44  
1         93.62        100.00  
2         75.36         85.25  
3         72.83        100.00  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p10_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p5_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw//p5_dataset_0.05_0.01_TAE/results/results_summary_20220913_101232.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p5_1
Processed for id p5_1
Starting on p5_2
Processed for id p5_2
Starting on p5_3
Processed for id p5_3
Starting on p5_4
Processed for id p5_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      72.32  88.70  61.79  66.00
temporal  74.16  75.12  77.02  70.33
combined  73.11  71.30  77.26  69.11


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn    100.00     90.62         94.12        100.00   
1       Exercising    100.00     93.41        100.00         98.90   
2       HavingMeal    100.00     65.71         85.07         81.43   
3        HouseWork    100.00     78.38         54.04         96.40   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork    100.00     64.24         84.56         79.86   
7        PhoneCall     51.56     32.04         89.09         47.57   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing    100.00     54.00         55.03         82.00   

   ppv_combined  tpr_combined  
0         94.12        100.00  
1        100.00         98.90  
2         85.07         81.43  
3         54.04         96.40  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p5_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p7_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw//p7_dataset_0.05_0.01_TAE/results/results_summary_20220913_101350.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p7_1
Processed for id p7_1
Starting on p7_2
Processed for id p7_2
Starting on p7_3
Processed for id p7_3
Starting on p7_4
Processed for id p7_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      71.43  88.45  60.77  65.47
temporal  77.45  76.69  81.14  73.63
combined  76.11  73.81  81.14  70.21


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn    100.00     70.59         94.44        100.00   
1       Exercising    100.00     93.90        100.00        100.00   
2       HavingMeal    100.00     65.22         82.19         86.96   
3        HouseWork    100.00     79.28         62.71        100.00   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork    100.00     69.02         89.43         79.80   
7        PhoneCall     53.12     27.87         88.00         72.13   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing    100.00     48.04         44.50         83.33   

   ppv_combined  tpr_combined  
0         94.44        100.00  
1        100.00        100.00  
2         82.19         86.96  
3         62.71        100.00  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p7_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p2_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw//p2_dataset_0.05_0.01_TAE/results/results_summary_20220913_101109.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p2_1
Processed for id p2_1
Starting on p2_2
Processed for id p2_2
Starting on p2_3
Processed for id p2_3
Starting on p2_4
Processed for id p2_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      71.05  87.66  60.38  64.50
temporal  68.93  68.09  75.49  60.85
combined  69.72  65.57  79.64  61.08


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn     100.0     58.33         92.31        100.00   
1       Exercising     100.0     92.86         65.42        100.00   
2       HavingMeal     100.0     69.66         68.91         92.13   
3        HouseWork     100.0     73.81         39.13         64.29   
4       InAMeeting       0.0      0.00          0.00          0.00   
5  MealPreparation       0.0      0.00          0.00          0.00   
6       OfficeWork     100.0     72.36         89.05         81.57   
7        PhoneCall      49.4     28.08         80.23         47.26   
8    PreparingMeal       0.0      0.00          0.00          0.00   
9         Relaxing     100.0     49.60         42.96        100.00   

   ppv_combined  tpr_combined  
0         92.31        100.00  
1         65.42        100.00  
2         68.91         92.13  
3         49.40         97.62  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p2_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p4_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw//p4_dataset_0.05_0.01_TAE/results/results_summary_20220913_115140.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p4_1
Processed for id p4_1
Starting on p4_2
Processed for id p4_2
Starting on p4_3
Processed for id p4_3
Starting on p4_4
Processed for id p4_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      70.85  88.64  59.29  63.07
temporal  71.67  71.34  76.60  67.21
combined  71.51  68.57  78.21  66.61


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn    100.00     51.52         76.74        100.00   
1       Exercising    100.00     91.67         89.55        100.00   
2       HavingMeal    100.00     67.44         68.22         84.88   
3        HouseWork    100.00     68.85         59.80        100.00   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork    100.00     65.57         88.28         72.16   
7        PhoneCall     57.14     37.21         75.28         51.94   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing    100.00     61.16         53.78        100.00   

   ppv_combined  tpr_combined  
0         76.74        100.00  
1         89.55        100.00  
2         68.22         84.88  
3         59.80        100.00  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p4_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p6_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw//p6_dataset_0.05_0.01_TAE/results/results_summary_20220913_100806.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p6_1
Processed for id p6_1
Starting on p6_2
Processed for id p6_2
Starting on p6_3
Processed for id p6_3
Starting on p6_4
Processed for id p6_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      73.19  88.63  63.03  67.36
temporal  66.00  60.65  76.95  59.15
combined  66.32  59.93  79.18  58.64


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn    100.00     75.76         47.83        100.00   
1       Exercising    100.00     92.94         80.39         96.47   
2       HavingMeal     85.71     72.73         43.31         83.33   
3        HouseWork    100.00     80.00         46.89         81.67   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork    100.00     67.48         80.97         76.99   
7        PhoneCall     56.06     28.68         51.88         53.49   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing    100.00     59.22         43.46        100.00   

   ppv_combined  tpr_combined  
0         47.83        100.00  
1         80.39         96.47  
2         43.31         83.33  
3         51.53         98.33  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p6_dataset_0.05_0.01_TAE

In [22]:
df_t = compiled_results_ts_dict['p4_3'].copy(deep=True)

Unnamed: 0,id,timestamp,gt_context,pred_context,tao_prediction,combined_context,onto_context
0,p4_3,1.662481e+09,Exercising,Exercising,[HavingMeal],"Exercising,HavingMeal",HavingMeal
1,p4_3,1.662481e+09,Exercising,Exercising,[HavingMeal],"Exercising,HavingMeal",HavingMeal
2,p4_3,1.662481e+09,Exercising,Exercising,[HavingMeal],"Exercising,HavingMeal",HavingMeal
3,p4_3,1.662481e+09,Exercising,Exercising,[HavingMeal],"Exercising,HavingMeal",HavingMeal
4,p4_3,1.662481e+09,Exercising,Exercising,[HavingMeal],"Exercising,HavingMeal",HavingMeal
...,...,...,...,...,...,...,...
288,p4_3,1.662481e+09,OfficeWork,OfficeWork,[HavingMeal],"HavingMeal,OfficeWork",HavingMeal
289,p4_3,1.662481e+09,OfficeWork,OfficeWork,[HavingMeal],"HavingMeal,OfficeWork",HavingMeal
290,p4_3,1.662481e+09,OfficeWork,OfficeWork,[HavingMeal],"HavingMeal,OfficeWork",HavingMeal
291,p4_3,1.662481e+09,OfficeWork,OfficeWork,[HavingMeal],"HavingMeal,OfficeWork",HavingMeal


# Mites Accuracy numbers

In [12]:

mites_cache_dir = "../../cache/sep12_rw_mites/"
experiment_dirs = glob.glob(f'{mites_cache_dir}/*')
df_mites_onto_pred = pd.read_csv("../../ontological_models/real_world_mites_ontology_predictions.csv")
# df_mites_onto_pred['tao_prediction'] = df_mites_onto_pred['tao_prediction'].apply(lambda x: x.replace('noevent','Unknown'))
df_mites_onto_pred['tao_prediction'] = df_mites_onto_pred['tao_prediction'].apply(
    lambda x: x.split(";") if not (str(x) == 'nan') else ['Unknown'])
df_mites_onto_pred['tao_prediction'] = df_mites_onto_pred['tao_prediction'].apply(
    lambda x: ['Unknown'] if (x[0]=='noevent') else x)
df_mites_onto_pred = df_mites_onto_pred.rename(columns={'session_id':'id'})
print(df_mites_onto_pred.head())

df_metrics =None
for experiment_dir in experiment_dirs:
    experiment = experiment_dir.split("/")[-1]
    experiment_out_dir = f'{out_result_dir}/mites_{experiment}'
    if not os.path.exists(experiment_out_dir):
        os.makedirs(experiment_out_dir)

    printm(f"## --------------------------- Started Experiment: {experiment} ----------------------------")
    # Compile results into ts dict
    printm("### fetch results for experiment")
    if True:
        result_file = sorted(glob.glob(f"{mites_cache_dir}/{experiment}/results/{result_summary_prefix}*.json"))[-1]
        ts_file = sorted(glob.glob(f"{mites_cache_dir}/{experiment}/results/{ts_prediction_file_prefix}*.pb"))[-1]
        print(result_file)
        exp_results = json.load(open(result_file, 'r'))
        ts_results = pickle.load(open(ts_file, 'rb'))
        cluster_centers = exp_results['direct_labels']
        dataset = exp_results['run_config']['dataset']

    # Filter out only test instances from ts_results
    zero_shape_ids = []
    for id in ts_results.keys():
        # print(f"id:{id}:{ts_results[id].shape}")
        ts_results[id] = ts_results[id][ts_results[id].isTrain==False]
        # print(f"id:{id}:{ts_results[id].shape}")
        if ts_results[id].shape[0]==0:
            zero_shape_ids.append(id)
    for id in zero_shape_ids:
        del ts_results[id]


    printm("### get cluster labels and best representation accuracy")
    if True:
        direct_cluster_centers = exp_results['direct_labels']
        direct_cluster_labels = get_cluster_labels(direct_cluster_centers, dataset, label_context_v1)
        decoded_cluster_centers = exp_results['decoded_labels']
        decoded_cluster_labels = get_cluster_labels(decoded_cluster_centers, dataset, label_context_v1)
        cluster_labels = []
        for idx in range(len(decoded_cluster_labels)):
            if len(decoded_cluster_labels[idx]) > 0:
                cluster_labels.append(decoded_cluster_labels[idx])
            else:
                cluster_labels.append(direct_cluster_labels[idx])
        df_cluster_merge = pd.DataFrame(np.array([[f"{idx}:"+','.join(cr) for idx,cr in enumerate(direct_cluster_labels)],
                                                  [f"{idx}:"+','.join(cr) for idx,cr in enumerate(decoded_cluster_labels)],
                                                  [f"{idx}:"+','.join(cr) for idx,cr in enumerate(cluster_labels)]]).T,columns=['direct','decoded','combination_1'])
        df_cluster_merge.to_csv(f"{experiment_out_dir}/cluster_merge.csv")
        representation_acc = exp_results['repr_training_metrics'][-1]
        json.dump(representation_acc, open(f"{experiment_out_dir}/representation_accuracy.json","w"))

    printm("### compile GT, Onto and Temporal results together")
    compiled_results_ts_dict  = compile_ts_results_v2(ts_results, df_mites_onto_pred,df_gt, cluster_labels)
    # break
    # get all available context from onto, gt and temporal
    printm("### get all available contexts")
    if True:
        all_context_list = []
        for id in compiled_results_ts_dict.keys():
            df_ts_id = compiled_results_ts_dict[id]
            all_context_list += np.unique(df_ts_id['gt_context'].values).tolist()
            all_context_list += np.unique(df_ts_id['pred_context'].values).tolist()
            all_context_list += np.unique(df_ts_id['onto_context'].values).tolist()
        all_context_list = np.unique(all_context_list).tolist()
        all_context_list = [xr.split(",") for xr in all_context_list]
        all_context_list = sorted(np.unique(np.concatenate(all_context_list)).tolist())
        if '' in all_context_list:
            del all_context_list[all_context_list.index('')]
        if 'Unknown' in all_context_list:
            del all_context_list[all_context_list.index('Unknown')]
        json.dump(all_context_list, open(f"{experiment_out_dir}/all_contexts.json","w"))

    # Get metrics
    printm("### get timestamp level metrics")
    if True:
        metric_columns = ['gt_vec','onto_pred_vec','tp_pred_vec','combined_pred_vec']
        for id_key in compiled_results_ts_dict.keys():
            dft = compiled_results_ts_dict[id_key]
            dft['gt_vec'] = dft.apply(lambda row: get_ctx_vec(row['gt_context'], all_context_list),axis=1)
            dft['onto_pred_vec'] = dft.apply(lambda row: get_ctx_vec(row['onto_context'], all_context_list),axis=1)
            dft['tp_pred_vec'] = dft.apply(lambda row: get_ctx_vec(row['pred_context'], all_context_list),axis=1)
            dft['combined_pred_vec'] = dft.apply(lambda row: get_ctx_vec(row['combined_context'], all_context_list),axis=1)

        ts_metrics = np.vstack([compiled_results_ts_dict[id][metric_columns].values for id in compiled_results_ts_dict.keys()])
        df_ts_metrics = pd.DataFrame(ts_metrics,columns=metric_columns)
        pickle.dump(compiled_results_ts_dict,open(f"{experiment_out_dir}/compiled_results.pb","wb"))

    # Get overall accuracy metrics for timestamps
    printm("### get overall accuracy metrics")
    if True:
        gt_ts_arr = np.stack(df_ts_metrics['gt_vec'].values)
        onto_ts_arr = np.stack(df_ts_metrics['onto_pred_vec'].values)
        tp_ts_arr = np.stack(df_ts_metrics['tp_pred_vec'].values)
        combined_ts_arr = np.stack(df_ts_metrics['combined_pred_vec'].values)
        onto_ts_metrics = get_overall_metrics(gt_ts_arr, onto_ts_arr)
        tp_ts_metrics = get_overall_metrics(gt_ts_arr, tp_ts_arr)
        combined_ts_metrics = get_overall_metrics(gt_ts_arr,combined_ts_arr)
        df_overall_ts_metrics = pd.DataFrame([onto_ts_metrics,tp_ts_metrics,combined_ts_metrics],columns=['F1','PPV','TPR','JC'],index=['onto','temporal','combined'])
        df_overall_ts_metrics.to_csv(f"{experiment_out_dir}/overall_metrics.csv")
        print(df_overall_ts_metrics)
    # context level accuracy
    printm("### get context level accuracy metrics")
    if True:
        df_context_ts_metrics = pd.DataFrame(all_context_list, columns=['context'])
        df_context_ts_metrics['ppv_onto'] = precision_score(gt_ts_arr, onto_ts_arr, average=None,zero_division=0).round(4)*100
        df_context_ts_metrics['tpr_onto'] = recall_score(gt_ts_arr, onto_ts_arr, average=None,zero_division=0).round(4)*100
        df_context_ts_metrics['ppv_temporal'] = precision_score(gt_ts_arr, tp_ts_arr, average=None,zero_division=0).round(4)*100
        df_context_ts_metrics['tpr_temporal'] = recall_score(gt_ts_arr, tp_ts_arr, average=None,zero_division=0).round(4)*100
        df_context_ts_metrics['ppv_combined'] = precision_score(gt_ts_arr, combined_ts_arr, average=None,zero_division=0).round(4)*100
        df_context_ts_metrics['tpr_combined'] = recall_score(gt_ts_arr, combined_ts_arr, average=None,zero_division=0).round(4)*100
        df_context_ts_metrics.to_csv(f"{experiment_out_dir}/context_metrics.csv")
        print(df_context_ts_metrics)

    printm(f"## Finished Experiment {experiment}")




      id   timestamp           activity tao_prediction
0  p10_1  1662667548            talking   [InAMeeting]
1  p10_1  1662667549            talking   [InAMeeting]
2  p10_1  1662667556            talking   [InAMeeting]
3  p10_1  1662667557  doorknock,talking      [Unknown]
4  p10_1  1662667558          doorknock     [ComingIn]


## --------------------------- Started Experiment: p1_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw_mites//p1_dataset_0.05_0.01_TAE/results/results_summary_20220913_201526.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p10_1
Processed for id p10_1
Starting on p10_2
Processed for id p10_2
Starting on p10_3
Processed for id p10_3
Starting on p10_4
Processed for id p10_4
Starting on p1_1
Processed for id p1_1
Starting on p1_2
Processed for id p1_2
Starting on p1_3
Processed for id p1_3
Starting on p1_4
Processed for id p1_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      21.25  36.77  15.52  16.25
temporal  28.32  28.05  29.85  20.16
combined  32.54  33.65  34.22  21.99


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn     27.45     48.28         16.81         68.97   
1       Exercising     36.54     22.89         19.35          7.23   
2       HavingMeal     16.67      7.50         15.28         13.75   
3        HouseWork     11.76      3.26          0.00          0.00   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork     55.19     21.79         55.22         56.92   
7        PhoneCall     17.65      5.59          0.00          0.00   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing     64.62     24.28         45.41         51.45   

   ppv_combined  tpr_combined  
0         16.30         75.86  
1         32.84         26.51  
2         15.28         13.75  
3         11.76          3.26  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p1_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p3_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw_mites//p3_dataset_0.05_0.01_TAE/results/results_summary_20220913_203413.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p3_1
Processed for id p3_1
Starting on p3_2
Processed for id p3_2
Starting on p3_3
Processed for id p3_3
Starting on p3_4
Processed for id p3_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      18.26  34.87  13.64  15.90
temporal  30.79  45.54  32.31  25.28
combined  31.99  37.74  33.44  25.10


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn     17.39     30.77         36.84         53.85   
1       Exercising      8.33      6.06         10.77         21.21   
2       HavingMeal     20.59     14.00         13.73         14.00   
3        HouseWork     42.11      8.51        100.00          5.32   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork     54.21     27.62         58.94         73.81   
7        PhoneCall     15.00      2.88         18.33         10.58   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing     50.00      3.28         46.67         11.48   

   ppv_combined  tpr_combined  
0         21.21         53.85  
1         10.45         21.21  
2         13.73         14.00  
3         52.17         12.77  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p3_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p10_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw_mites//p10_dataset_0.05_0.01_TAE/results/results_summary_20220913_201905.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p10_1
Processed for id p10_1
Starting on p10_2
Processed for id p10_2
Starting on p10_3
Processed for id p10_3
Starting on p10_4
Processed for id p10_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      18.52  30.93  14.06  15.62
temporal  20.83  21.61  23.65  15.17
combined  24.51  25.01  27.98  16.85


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn     35.71     66.67          0.00          0.00   
1       Exercising     41.67     26.79         23.44         26.79   
2       HavingMeal     16.67     11.36         14.29         15.91   
3        HouseWork      8.70      4.12          0.00          0.00   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork     41.67     16.43         35.38         21.60   
7        PhoneCall     18.42      7.45         19.31         53.19   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing     57.69     17.44         31.25         40.70   

   ppv_combined  tpr_combined  
0         35.71         66.67  
1         30.67         41.07  
2         14.00         15.91  
3          8.70          4.12  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p10_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p5_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw_mites//p5_dataset_0.05_0.01_TAE/results/results_summary_20220913_203508.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p5_1
Processed for id p5_1
Starting on p5_2
Processed for id p5_2
Starting on p5_3
Processed for id p5_3
Starting on p5_4
Processed for id p5_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      25.00  55.88  18.09  18.61
temporal  31.36  50.99  32.80  19.47
combined  32.58  46.42  34.39  19.74


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn      9.76     57.14          5.98        100.00   
1       Exercising     61.54     18.60         56.41         51.16   
2       HavingMeal     78.12     52.08         30.30         83.33   
3        HouseWork     80.00      4.76        100.00          2.38   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork     52.83     16.47         45.16         32.94   
7        PhoneCall     42.31     16.92         25.35         27.69   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing     55.00     18.97         68.97         34.48   

   ppv_combined  tpr_combined  
0          5.47        100.00  
1         53.66         51.16  
2         30.30         83.33  
3         80.00          4.76  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p5_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p7_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw_mites//p7_dataset_0.05_0.01_TAE/results/results_summary_20220913_203932.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p7_1
Processed for id p7_1
Starting on p7_2
Processed for id p7_2
Starting on p7_3
Processed for id p7_3
Starting on p7_4
Processed for id p7_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      25.86  39.69  20.81  23.02
temporal  29.91  28.63  33.97  21.57
combined  32.57  32.08  37.58  22.63


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn     26.32     41.67          6.49        100.00   
1       Exercising     91.43     80.00         64.71         82.50   
2       HavingMeal      0.00      0.00          3.33          3.85   
3        HouseWork     14.29      2.56          0.00          0.00   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork     55.70     30.56         56.25         56.25   
7        PhoneCall     24.00      7.50         13.11         10.00   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing     64.29     15.79         27.78         43.86   

   ppv_combined  tpr_combined  
0          6.45        100.00  
1         68.97        100.00  
2          3.33          3.85  
3         14.29          2.56  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p7_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p2_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw_mites//p2_dataset_0.05_0.01_TAE/results/results_summary_20220913_203346.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p2_1
Processed for id p2_1
Starting on p2_2
Processed for id p2_2
Starting on p2_3
Processed for id p2_3
Starting on p2_4
Processed for id p2_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      13.22  44.05   8.49  10.40
temporal  28.42  25.84  33.08  17.42
combined  28.74  25.94  33.96  16.78


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn     19.05     20.00         13.16         50.00   
1       Exercising     10.34      8.11          0.00          0.00   
2       HavingMeal      0.00      0.00          7.08         10.39   
3        HouseWork      3.85      0.91         12.70         14.55   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork     69.33     18.64         50.32         56.63   
7        PhoneCall     25.00      2.46         17.74         18.03   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing    100.00      4.27         19.54         43.59   

   ppv_combined  tpr_combined  
0         12.90         60.00  
1          4.00          8.11  
2          7.08         10.39  
3         11.51         14.55  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p2_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p4_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw_mites//p4_dataset_0.05_0.01_TAE/results/results_summary_20220913_203419.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p4_1
Processed for id p4_1
Starting on p4_2
Processed for id p4_2
Starting on p4_3
Processed for id p4_3
Starting on p4_4
Processed for id p4_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      12.07  45.46   9.14  10.65
temporal  31.62  33.80  32.49  19.57
combined  32.38  35.45  33.50  19.42


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn     14.71     55.56          8.74        100.00   
1       Exercising     21.82     37.50         22.22         68.75   
2       HavingMeal      0.00      0.00          1.69          2.08   
3        HouseWork     11.11      2.38          0.00          0.00   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork     64.71     10.05         61.75         51.60   
7        PhoneCall     33.33     15.07         27.63         28.77   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing    100.00      2.35         41.94         30.59   

   ppv_combined  tpr_combined  
0          8.18        100.00  
1         23.15         78.12  
2          1.69          2.08  
3         11.11          2.38  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p4_dataset_0.05_0.01_TAE

## --------------------------- Started Experiment: p6_dataset_0.05_0.01_TAE ----------------------------

### fetch results for experiment

../../cache/sep12_rw_mites//p6_dataset_0.05_0.01_TAE/results/results_summary_20220913_203859.json


### get cluster labels and best representation accuracy

### compile GT, Onto and Temporal results together

Starting on p6_1
Processed for id p6_1
Starting on p6_2
Processed for id p6_2
Starting on p6_3
Processed for id p6_3
Starting on p6_4
Processed for id p6_4


### get all available contexts

### get timestamp level metrics

### get overall accuracy metrics

             F1    PPV    TPR     JC
onto      25.10  40.52  20.35  23.25
temporal  38.15  34.40  46.10  34.48
combined  37.89  33.76  46.32  33.28


### get context level accuracy metrics

           context  ppv_onto  tpr_onto  ppv_temporal  tpr_temporal  \
0         ComingIn     15.15     38.46         16.88        100.00   
1       Exercising     33.33     14.29         41.67         23.81   
2       HavingMeal     22.86     25.00         26.79         46.88   
3        HouseWork      0.00      0.00          0.00          0.00   
4       InAMeeting      0.00      0.00          0.00          0.00   
5  MealPreparation      0.00      0.00          0.00          0.00   
6       OfficeWork     54.00     31.03         56.28         69.54   
7        PhoneCall     36.59     25.00         15.00         30.00   
8    PreparingMeal      0.00      0.00          0.00          0.00   
9         Relaxing     85.71     10.71         42.35         64.29   

   ppv_combined  tpr_combined  
0         15.66        100.00  
1         38.46         23.81  
2         25.42         46.88  
3          0.00          0.00  
4          0.00          0.00  
5          0.00          0.00  
6    

## Finished Experiment p6_dataset_0.05_0.01_TAE