In [7]:
import pandas as pd
import egoviz.models.processing as pr
import egoviz.models.evaluation as ev

from collections import Counter

SEED = 42

In [2]:
# if df pkl exists, load it
try:
    df = pd.read_pickle(r"C:\Users\adesh\Documents\GitHub\EgoVizML\data\home_date_all_preds_df.pkl")
except:
    data = pr.load_pickle(r"C:\Users\adesh\Documents\GitHub\EgoVizML\data\home_date_all_preds.pkl")
    df = pd.DataFrame(columns=['video', 'frame', 'classes', 'active', 'adl'])

    for id, dets in data.items():
        adl = id.split('_', 1)[0]
        video = id.split('_')[1]
        frame = id.split('_')[2]
        classes = dets['remapped_metadata']
        active = dets['active_objects']

        row = {'video': video, 'frame': frame, 'classes': classes, 'adl': adl, 'active': active}

        df.loc[len(df)] = row

    # save df
    df.to_pickle(r"C:\Users\adesh\Documents\GitHub\EgoVizML\data\home_date_all_preds_df.pkl")

df.head()

Unnamed: 0,video,frame,classes,active,adl
0,SCI06-10--12,frame0,"[clothing_accessory, phone_tablet, other, offi...","[False, False, False, True, True, False, False...",communication-management
1,SCI06-10--12,frame147,"[phone_tablet, office_stationary, other, offic...","[False, True, False, True, False, False, False...",communication-management
2,SCI06-10--12,frame196,"[phone_tablet, phone_tablet, other, office_sta...","[False, False, False, True, False, False, Fals...",communication-management
3,SCI06-10--12,frame245,"[phone_tablet, clothing_accessory, other, offi...","[False, False, False, False, True, False, Fals...",communication-management
4,SCI06-10--12,frame294,"[phone_tablet, clothing_accessory, other, offi...","[False, False, False, True, False, False, Fals...",communication-management


In [3]:
def count_occurrences(classes, active):
    class_counts = Counter(classes)
    active_counts = Counter({cls: sum([act and (cls == c) for act, c in zip(active, classes)]) for cls in set(classes)})
    return class_counts, active_counts

# Apply the function to create new columns
df['class_counts'], df['active_counts'] = zip(*df.apply(lambda row: count_occurrences(row['classes'], row['active']), axis=1))

df.head()


Unnamed: 0,video,frame,classes,active,adl,class_counts,active_counts
0,SCI06-10--12,frame0,"[clothing_accessory, phone_tablet, other, offi...","[False, False, False, True, True, False, False...",communication-management,"{'clothing_accessory': 1, 'phone_tablet': 3, '...","{'clothing_accessory': 0, 'office_stationary':..."
1,SCI06-10--12,frame147,"[phone_tablet, office_stationary, other, offic...","[False, True, False, True, False, False, False...",communication-management,"{'phone_tablet': 2, 'office_stationary': 3, 'o...","{'clothing_accessory': 0, 'furnishing': 0, 'of..."
2,SCI06-10--12,frame196,"[phone_tablet, phone_tablet, other, office_sta...","[False, False, False, True, False, False, Fals...",communication-management,"{'phone_tablet': 2, 'other': 1, 'office_statio...","{'clothing_accessory': 0, 'furnishing': 0, 'of..."
3,SCI06-10--12,frame245,"[phone_tablet, clothing_accessory, other, offi...","[False, False, False, False, True, False, Fals...",communication-management,"{'phone_tablet': 2, 'clothing_accessory': 1, '...","{'clothing_accessory': 0, 'office_stationary':..."
4,SCI06-10--12,frame294,"[phone_tablet, clothing_accessory, other, offi...","[False, False, False, True, False, False, Fals...",communication-management,"{'phone_tablet': 2, 'clothing_accessory': 1, '...","{'clothing_accessory': 0, 'office_stationary':..."


In [4]:
# Create a new DataFrame from class_counts and active_counts
counts_df = pd.DataFrame(df.apply(lambda row: {'adl': row['adl'], 'video': row['video'], **{f'count_{key}': value for key, value in row['class_counts'].items()}, **{f'active_{key}': value for key, value in row['active_counts'].items()}}, axis=1).tolist())

# Group by video and sum the values for each video
grouped_counts_df = counts_df.groupby('video').agg({**{'adl': 'first'}, **{col: 'sum' for col in counts_df.columns if col not in ['adl', 'video']}})

In [5]:
grouped_counts_df = grouped_counts_df.reset_index(); grouped_counts_df.head()

Unnamed: 0,video,adl,count_clothing_accessory,count_phone_tablet,count_other,count_office_stationary,count_footwear,count_furniture,active_clothing_accessory,active_office_stationary,...,count_house_fixtures,active_house_fixtures,count_tableware,active_tableware,count_bathroom_fixture,active_bathroom_fixture,count_plant,active_plant,count_hat,active_hat
0,SCI02-1--1,functional-mobility,0.0,6.0,7.0,13.0,3.0,10.0,0.0,0.0,...,20.0,0.0,6.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0
1,SCI02-1--10,meal-preparation-cleanup,2.0,0.0,6.0,2.0,1.0,2.0,0.0,0.0,...,17.0,0.0,18.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0
2,SCI02-1--11,meal-preparation-cleanup,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,17.0,0.0,25.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0
3,SCI02-1--12,meal-preparation-cleanup,0.0,0.0,8.0,3.0,0.0,4.0,0.0,0.0,...,15.0,0.0,21.0,0.0,7.0,0.0,1.0,0.0,0.0,0.0
4,SCI02-1--2,meal-preparation-cleanup,10.0,4.0,6.0,2.0,1.0,2.0,0.0,0.0,...,35.0,1.0,8.0,0.0,11.0,0.0,3.0,0.0,0.0,0.0


In [6]:
df_no_active = grouped_counts_df.drop(columns=[col for col in grouped_counts_df.columns if 'active' in col]); df_no_active.head()

Unnamed: 0,video,adl,count_clothing_accessory,count_phone_tablet,count_other,count_office_stationary,count_footwear,count_furniture,count_furnishing,count_drinkware,...,count_musical_instrument,count_sink,count_cabinetry,count_kitchen_appliance,count_tv_computer,count_house_fixtures,count_tableware,count_bathroom_fixture,count_plant,count_hat
0,SCI02-1--1,functional-mobility,0.0,6.0,7.0,13.0,3.0,10.0,8.0,19.0,...,0.0,6.0,15.0,7.0,0.0,20.0,6.0,4.0,0.0,0.0
1,SCI02-1--10,meal-preparation-cleanup,2.0,0.0,6.0,2.0,1.0,2.0,3.0,18.0,...,0.0,15.0,1.0,3.0,0.0,17.0,18.0,6.0,0.0,0.0
2,SCI02-1--11,meal-preparation-cleanup,1.0,0.0,2.0,0.0,0.0,0.0,1.0,7.0,...,0.0,18.0,0.0,0.0,0.0,17.0,25.0,13.0,0.0,0.0
3,SCI02-1--12,meal-preparation-cleanup,0.0,0.0,8.0,3.0,0.0,4.0,2.0,28.0,...,0.0,14.0,10.0,2.0,0.0,15.0,21.0,7.0,1.0,0.0
4,SCI02-1--2,meal-preparation-cleanup,10.0,4.0,6.0,2.0,1.0,2.0,1.0,47.0,...,0.0,13.0,19.0,11.0,0.0,35.0,8.0,11.0,3.0,0.0


## Classifiers

In [10]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression

# Random Forest Classifier
rf_clf = RandomForestClassifier(n_estimators=500, random_state=SEED)

# Gradient Boosting Classifier
gb_clf = GradientBoostingClassifier(n_estimators=500, random_state=SEED)

# AdaBoost Classifier
ada_clf = AdaBoostClassifier(n_estimators=500, random_state=SEED)

# Stacking Classifier
stack_clf = StackingClassifier(estimators=[('rf', rf_clf), ('gb', gb_clf), ('ada', ada_clf)], final_estimator=LogisticRegression(random_state=SEED), n_jobs=-1)


## With Active Objects

In [11]:
X = grouped_counts_df.drop(['adl', 'video'], axis=1)
y = grouped_counts_df['adl']
groups = grouped_counts_df['video'].str[:5]

# try all classifiers
rf_active = ev.leave_one_group_out_cv(grouped_counts_df, X, y, groups, rf_clf)
gb_active = ev.leave_one_group_out_cv(grouped_counts_df, X, y, groups, gb_clf)
ada_active = ev.leave_one_group_out_cv(grouped_counts_df, X, y, groups, ada_clf)
stack_active = ev.leave_one_group_out_cv(grouped_counts_df, X, y, groups, stack_clf)

In [14]:
# print results for each classifier
print('Random Forest Classifier')
rf_active

Random Forest Classifier


Unnamed: 0,group_left_out,accuracy,precision,recall,f1,mean_accuracy,mean_precision,mean_recall,mean_f1
0,SCI02,0.864583,0.856504,0.382456,0.407556,0.675268,0.600725,0.605631,0.501167
1,SCI03,0.765625,0.70709,0.56192,0.326276,0.675268,0.600725,0.605631,0.501167
2,SCI06,0.631068,0.318251,0.401299,0.910383,0.675268,0.600725,0.605631,0.501167
3,SCI08,0.571429,0.666667,0.666667,0.333333,0.675268,0.600725,0.605631,0.501167
4,SCI10,0.188073,0.419082,0.659632,0.16595,0.675268,0.600725,0.605631,0.501167
5,SCI11,0.752525,0.544485,0.395079,0.540243,0.675268,0.600725,0.605631,0.501167
6,SCI12,0.736364,0.523832,0.607081,0.682419,0.675268,0.600725,0.605631,0.501167
7,SCI13,0.706215,0.56185,0.692364,0.375935,0.675268,0.600725,0.605631,0.501167
8,SCI14,0.657609,0.662307,0.55612,0.435772,0.675268,0.600725,0.605631,0.501167
9,SCI15,0.911765,0.707612,0.647483,0.922805,0.675268,0.600725,0.605631,0.501167


## Without Active Objects

In [12]:
X = df_no_active.drop(['adl', 'video'], axis=1)
y = df_no_active['adl']
groups = df_no_active['video'].str[:5]

# try all classifiers
rf = ev.leave_one_group_out_cv(grouped_counts_df, X, y, groups, rf_clf)
gb = ev.leave_one_group_out_cv(grouped_counts_df, X, y, groups, gb_clf)
ada = ev.leave_one_group_out_cv(grouped_counts_df, X, y, groups, ada_clf)
stack = ev.leave_one_group_out_cv(grouped_counts_df, X, y, groups, stack_clf)

In [16]:
# print results for each classifier
print('Random Forest Classifier')
rf

Random Forest Classifier


Unnamed: 0,group_left_out,accuracy,precision,recall,f1,mean_accuracy,mean_precision,mean_recall,mean_f1
0,SCI02,0.864583,0.856504,0.382456,0.407556,0.653633,0.597967,0.576903,0.488539
1,SCI03,0.78125,0.711966,0.567183,0.332621,0.653633,0.597967,0.576903,0.488539
2,SCI06,0.61165,0.318805,0.256061,0.768366,0.653633,0.597967,0.576903,0.488539
3,SCI08,0.571429,0.666667,0.666667,0.333333,0.653633,0.597967,0.576903,0.488539
4,SCI10,0.165138,0.401361,0.687899,0.124753,0.653633,0.597967,0.576903,0.488539
5,SCI11,0.717172,0.694306,0.358825,0.372689,0.653633,0.597967,0.576903,0.488539
6,SCI12,0.718182,0.444558,0.531231,0.750156,0.653633,0.597967,0.576903,0.488539
7,SCI13,0.666667,0.552288,0.649264,0.340533,0.653633,0.597967,0.576903,0.488539
8,SCI14,0.521739,0.477771,0.473182,0.514716,0.653633,0.597967,0.576903,0.488539
9,SCI15,0.888235,0.686063,0.607588,0.887887,0.653633,0.597967,0.576903,0.488539
