In [1]:
import os

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
from xgboost import XGBClassifier

import egoviz.models.processing as pr
import egoviz.models.evaluation as ev

SEED = 42

In [2]:
# if df pkl exists, load it
cwd = os.getcwd()

try:
    file_path = os.path.join(cwd, '../data/home_data_all_preds_df_counts.pkl')
    df = pr.load_pickle(file_path)

except FileNotFoundError:
    file_path = os.path.join(cwd, '../data/home_data_all_preds.pkl')
    data = pr.load_pickle(file_path)
    df = pd.DataFrame(columns=['video', 'frame', 'classes', 'active', 'adl'])

    for id, dets in data.items():
        adl = id.split('_', 1)[0]
        video = id.split('_')[1]
        frame = id.split('_')[2]
        classes = dets['remapped_metadata']
        active = dets['active_objects']

        row = {'video': video, 'frame': frame, 'classes': classes, 'adl': adl, 'active': active}

        df.loc[len(df)] = row

    # save df
    df.to_pickle(os.path.join(cwd, '../data/home_data_all_preds_df_counts.pkl'))

In [3]:
df_active = pr.generate_counts_df(df); df_active.head()

Unnamed: 0,video,adl,count_clothing_accessory,count_phone_tablet,count_other,count_office_stationary,count_footwear,count_furniture,active_other,active_furniture,...,count_house_fixtures,active_house_fixtures,count_tableware,active_tableware,count_bathroom_fixture,active_bathroom_fixture,count_plant,active_plant,count_hat,active_hat
0,SCI02-1--1,functional-mobility,0.0,6.0,7.0,13.0,3.0,10.0,0.0,0.0,...,20.0,0.0,6.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0
1,SCI02-1--10,meal-preparation-cleanup,2.0,0.0,6.0,2.0,1.0,2.0,0.0,0.0,...,17.0,0.0,18.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0
2,SCI02-1--11,meal-preparation-cleanup,1.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,...,17.0,0.0,25.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0
3,SCI02-1--12,meal-preparation-cleanup,0.0,0.0,8.0,3.0,0.0,4.0,0.0,0.0,...,15.0,0.0,21.0,0.0,7.0,0.0,1.0,0.0,0.0,0.0
4,SCI02-1--2,meal-preparation-cleanup,10.0,4.0,6.0,2.0,1.0,2.0,0.0,0.0,...,35.0,1.0,8.0,0.0,11.0,0.0,3.0,0.0,0.0,0.0


In [4]:
df_no_active = df_active.drop(columns=[col for col in df_active.columns if 'active' in col]); df_no_active.head()

Unnamed: 0,video,adl,count_clothing_accessory,count_phone_tablet,count_other,count_office_stationary,count_footwear,count_furniture,count_furnishing,count_drinkware,...,count_musical_instrument,count_sink,count_cabinetry,count_kitchen_appliance,count_tv_computer,count_house_fixtures,count_tableware,count_bathroom_fixture,count_plant,count_hat
0,SCI02-1--1,functional-mobility,0.0,6.0,7.0,13.0,3.0,10.0,8.0,19.0,...,0.0,6.0,15.0,7.0,0.0,20.0,6.0,4.0,0.0,0.0
1,SCI02-1--10,meal-preparation-cleanup,2.0,0.0,6.0,2.0,1.0,2.0,3.0,18.0,...,0.0,15.0,1.0,3.0,0.0,17.0,18.0,6.0,0.0,0.0
2,SCI02-1--11,meal-preparation-cleanup,1.0,0.0,2.0,0.0,0.0,0.0,1.0,7.0,...,0.0,18.0,0.0,0.0,0.0,17.0,25.0,13.0,0.0,0.0
3,SCI02-1--12,meal-preparation-cleanup,0.0,0.0,8.0,3.0,0.0,4.0,2.0,28.0,...,0.0,14.0,10.0,2.0,0.0,15.0,21.0,7.0,1.0,0.0
4,SCI02-1--2,meal-preparation-cleanup,10.0,4.0,6.0,2.0,1.0,2.0,1.0,47.0,...,0.0,13.0,19.0,11.0,0.0,35.0,8.0,11.0,3.0,0.0


In [5]:
# normalize the counts using min-max scaling from sklearn
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

# create new dfs
df_active_scaled = df_active.copy()
df_no_active_scaled = df_no_active.copy()

# scale the data, excluding the adl and video columns
df_active_scaled[df_active.columns[2:]] = scaler.fit_transform(df_active[df_active.columns[2:]])
df_no_active_scaled[df_no_active.columns[2:]] = scaler.fit_transform(df_no_active[df_no_active.columns[2:]])


In [6]:
df_active_scaled.head()

Unnamed: 0,video,adl,count_clothing_accessory,count_phone_tablet,count_other,count_office_stationary,count_footwear,count_furniture,active_other,active_furniture,...,count_house_fixtures,active_house_fixtures,count_tableware,active_tableware,count_bathroom_fixture,active_bathroom_fixture,count_plant,active_plant,count_hat,active_hat
0,SCI02-1--1,functional-mobility,0.0,0.136364,0.058824,0.094203,0.0625,0.079365,0.0,0.0,...,0.108696,0.0,0.038462,0.0,0.076923,0.0,0.0,0.0,0.0,0.0
1,SCI02-1--10,meal-preparation-cleanup,0.042553,0.0,0.05042,0.014493,0.020833,0.015873,0.0,0.0,...,0.092391,0.0,0.115385,0.0625,0.115385,0.0,0.0,0.0,0.0,0.0
2,SCI02-1--11,meal-preparation-cleanup,0.021277,0.0,0.016807,0.0,0.0,0.0,0.0,0.0,...,0.092391,0.0,0.160256,0.0,0.25,0.0,0.0,0.0,0.0,0.0
3,SCI02-1--12,meal-preparation-cleanup,0.0,0.0,0.067227,0.021739,0.0,0.031746,0.0,0.0,...,0.081522,0.0,0.134615,0.0,0.134615,0.0,0.02439,0.0,0.0,0.0
4,SCI02-1--2,meal-preparation-cleanup,0.212766,0.090909,0.05042,0.014493,0.020833,0.015873,0.0,0.0,...,0.190217,0.5,0.051282,0.0,0.211538,0.0,0.073171,0.0,0.0,0.0


In [7]:
df_no_active_scaled.head()

Unnamed: 0,video,adl,count_clothing_accessory,count_phone_tablet,count_other,count_office_stationary,count_footwear,count_furniture,count_furnishing,count_drinkware,...,count_musical_instrument,count_sink,count_cabinetry,count_kitchen_appliance,count_tv_computer,count_house_fixtures,count_tableware,count_bathroom_fixture,count_plant,count_hat
0,SCI02-1--1,functional-mobility,0.0,0.136364,0.058824,0.094203,0.0625,0.079365,0.047904,0.093596,...,0.0,0.122449,0.185185,0.189189,0.0,0.108696,0.038462,0.076923,0.0,0.0
1,SCI02-1--10,meal-preparation-cleanup,0.042553,0.0,0.05042,0.014493,0.020833,0.015873,0.017964,0.08867,...,0.0,0.306122,0.012346,0.081081,0.0,0.092391,0.115385,0.115385,0.0,0.0
2,SCI02-1--11,meal-preparation-cleanup,0.021277,0.0,0.016807,0.0,0.0,0.0,0.005988,0.034483,...,0.0,0.367347,0.0,0.0,0.0,0.092391,0.160256,0.25,0.0,0.0
3,SCI02-1--12,meal-preparation-cleanup,0.0,0.0,0.067227,0.021739,0.0,0.031746,0.011976,0.137931,...,0.0,0.285714,0.123457,0.054054,0.0,0.081522,0.134615,0.134615,0.02439,0.0
4,SCI02-1--2,meal-preparation-cleanup,0.212766,0.090909,0.05042,0.014493,0.020833,0.015873,0.005988,0.231527,...,0.0,0.265306,0.234568,0.297297,0.0,0.190217,0.051282,0.211538,0.073171,0.0


In [8]:
# split data

label_encoder = LabelEncoder()

X = df_active_scaled.drop(columns=['adl', 'video'])
y = df_active_scaled['adl']
y_encoded = label_encoder.fit_transform(y)

groups = df_active_scaled['video'].str[:5]


## Classifiers

In [25]:
# Random Forest Classifier
rf_clf = RandomForestClassifier(random_state=SEED)
rf_clf_no_active = RandomForestClassifier(random_state=SEED)

# Logistic Regression
log_clf = LogisticRegression(random_state=SEED)
log_clf_no_active = LogisticRegression(random_state=SEED)

# XGBoost
xgb_clf = XGBClassifier(random_state=SEED)
xgb_clf_no_active = XGBClassifier(random_state=SEED)

## With Active Objects

In [26]:
# try all classifiers
rf_active = ev.logocv(df_active, X, y_encoded, groups, rf_clf)
log_active = ev.logocv(df_active, X, y_encoded, groups, log_clf)
xgb_active = ev.logocv(df_active, X, y_encoded, groups, xgb_clf)

2023-12-03 01:51:53,923 - root - INFO - LOGOCV complete for RandomForestClassifier
2023-12-03 01:51:55,347 - root - INFO - LOGOCV complete for LogisticRegression
2023-12-03 01:52:09,308 - root - INFO - LOGOCV complete for XGBClassifier


In [27]:
# print results for each classifier
print('Random Forest Classifier')
rf_active[0]

Random Forest Classifier


Unnamed: 0,group_left_out,accuracy,precision,recall,f1,mean_accuracy,mean_precision,mean_recall,mean_f1
0,SCI02,0.875,0.859924,0.392982,0.416842,0.676481,0.587692,0.607586,0.464253
1,SCI03,0.796875,0.717189,0.572446,0.339048,0.676481,0.587692,0.607586,0.464253
2,SCI06,0.631068,0.399981,0.294156,0.683062,0.676481,0.587692,0.607586,0.464253
3,SCI08,0.571429,0.666667,0.666667,0.333333,0.676481,0.587692,0.607586,0.464253
4,SCI10,0.165138,0.280081,0.6354,0.150137,0.676481,0.587692,0.607586,0.464253
5,SCI11,0.747475,0.687749,0.388286,0.39349,0.676481,0.587692,0.607586,0.464253
6,SCI12,0.718182,0.473541,0.549428,0.774513,0.676481,0.587692,0.607586,0.464253
7,SCI13,0.700565,0.55754,0.684031,0.368876,0.676481,0.587692,0.607586,0.464253
8,SCI14,0.652174,0.659439,0.558359,0.436662,0.676481,0.587692,0.607586,0.464253
9,SCI15,0.894118,0.641302,0.746131,0.435315,0.676481,0.587692,0.607586,0.464253


In [28]:
print('Logistic Regression')
log_active[0]

Logistic Regression


Unnamed: 0,group_left_out,accuracy,precision,recall,f1,mean_accuracy,mean_precision,mean_recall,mean_f1
0,SCI02,0.947917,0.874444,0.530516,0.495804,0.699223,0.566784,0.641015,0.502103
1,SCI03,0.8125,0.718519,0.624326,0.423232,0.699223,0.566784,0.641015,0.502103
2,SCI06,0.582524,0.475987,0.248918,0.63131,0.699223,0.566784,0.641015,0.502103
3,SCI08,0.571429,0.666667,0.666667,0.333333,0.699223,0.566784,0.641015,0.502103
4,SCI10,0.238532,0.376883,0.723556,0.160046,0.699223,0.566784,0.641015,0.502103
5,SCI11,0.752525,0.547139,0.395079,0.541126,0.699223,0.566784,0.641015,0.502103
6,SCI12,0.713636,0.520401,0.588002,0.663004,0.699223,0.566784,0.641015,0.502103
7,SCI13,0.677966,0.519048,0.728538,0.326821,0.699223,0.566784,0.641015,0.502103
8,SCI14,0.755435,0.568763,0.654636,0.673905,0.699223,0.566784,0.641015,0.502103
9,SCI15,0.9,0.914745,0.669721,0.66588,0.699223,0.566784,0.641015,0.502103


In [29]:
print('XGBoost')
xgb_active[0]

XGBoost


Unnamed: 0,group_left_out,accuracy,precision,recall,f1,mean_accuracy,mean_precision,mean_recall,mean_f1
0,SCI02,0.916667,0.865614,0.442797,0.450728,0.698801,0.549832,0.627834,0.503456
1,SCI03,0.765625,0.508889,0.555418,0.526641,0.698801,0.549832,0.627834,0.503456
2,SCI06,0.660194,0.564613,0.45,0.713411,0.698801,0.549832,0.627834,0.503456
3,SCI08,0.571429,0.666667,0.666667,0.333333,0.698801,0.549832,0.627834,0.503456
4,SCI10,0.490826,0.310046,0.767473,0.235652,0.698801,0.549832,0.627834,0.503456
5,SCI11,0.742424,0.400086,0.387143,0.677617,0.698801,0.549832,0.627834,0.503456
6,SCI12,0.690909,0.490958,0.562492,0.639526,0.698801,0.549832,0.627834,0.503456
7,SCI13,0.644068,0.348294,0.701379,0.426388,0.698801,0.549832,0.627834,0.503456
8,SCI14,0.722826,0.746816,0.617838,0.496206,0.698801,0.549832,0.627834,0.503456
9,SCI15,0.864706,0.467588,0.73941,0.596929,0.698801,0.549832,0.627834,0.503456


## Without Active Objects

In [30]:
# split data

label_encoder = LabelEncoder()

X_inactive = df_no_active_scaled.drop(columns=['adl', 'video'])
y_inactive = df_no_active_scaled['adl']
y_encoded_inactive = label_encoder.fit_transform(y_inactive)

groups_inactive = df_no_active_scaled['video'].str[:5]

In [32]:
# try all classifiers
rf = ev.logocv(df_no_active_scaled, X_inactive, y_encoded_inactive, groups_inactive, rf_clf_no_active)
log = ev.logocv(df_no_active_scaled, X_inactive, y_encoded_inactive, groups_inactive, log_clf_no_active)
xgb_mod = ev.logocv(df_no_active_scaled, X_inactive, y_encoded_inactive, groups_inactive, xgb_clf_no_active)

2023-12-03 01:52:24,040 - root - INFO - LOGOCV complete for RandomForestClassifier
2023-12-03 01:52:25,143 - root - INFO - LOGOCV complete for LogisticRegression
2023-12-03 01:52:33,382 - root - INFO - LOGOCV complete for XGBClassifier


In [33]:
# print results for each classifier
print('Random Forest Classifier')
rf[0]

Random Forest Classifier


Unnamed: 0,group_left_out,accuracy,precision,recall,f1,mean_accuracy,mean_precision,mean_recall,mean_f1
0,SCI02,0.864583,0.856504,0.382456,0.407556,0.633291,0.573274,0.549723,0.479731
1,SCI03,0.765625,0.508889,0.555418,0.526641,0.633291,0.573274,0.549723,0.479731
2,SCI06,0.601942,0.464225,0.264286,0.627943,0.633291,0.573274,0.549723,0.479731
3,SCI08,0.571429,0.666667,0.666667,0.333333,0.633291,0.573274,0.549723,0.479731
4,SCI10,0.165138,0.466667,0.630589,0.14519,0.633291,0.573274,0.549723,0.479731
5,SCI11,0.691919,0.693596,0.336159,0.349876,0.633291,0.573274,0.549723,0.479731
6,SCI12,0.736364,0.453458,0.509632,0.613809,0.633291,0.573274,0.549723,0.479731
7,SCI13,0.581921,0.440215,0.540209,0.578038,0.633291,0.573274,0.549723,0.479731
8,SCI14,0.5,0.458666,0.444733,0.481266,0.633291,0.573274,0.549723,0.479731
9,SCI15,0.882353,0.548417,0.68001,0.705731,0.633291,0.573274,0.549723,0.479731


In [34]:
print('Logistic Regression')
log[0]

Logistic Regression


Unnamed: 0,group_left_out,accuracy,precision,recall,f1,mean_accuracy,mean_precision,mean_recall,mean_f1
0,SCI02,0.9375,0.848447,0.46385,0.455983,0.631103,0.523749,0.578691,0.4319
1,SCI03,0.8125,0.722595,0.577709,0.344381,0.631103,0.523749,0.578691,0.4319
2,SCI06,0.495146,0.538902,0.225108,0.518052,0.631103,0.523749,0.578691,0.4319
3,SCI08,0.571429,0.666667,0.666667,0.333333,0.631103,0.523749,0.578691,0.4319
4,SCI10,0.178899,0.254286,0.698052,0.261398,0.631103,0.523749,0.578691,0.4319
5,SCI11,0.646465,0.485824,0.325873,0.472814,0.631103,0.523749,0.578691,0.4319
6,SCI12,0.713636,0.408201,0.486982,0.719495,0.631103,0.523749,0.578691,0.4319
7,SCI13,0.553672,0.5065,0.648754,0.241932,0.631103,0.523749,0.578691,0.4319
8,SCI14,0.467391,0.349302,0.454234,0.44885,0.631103,0.523749,0.578691,0.4319
9,SCI15,0.888235,0.540502,0.703861,0.7203,0.631103,0.523749,0.578691,0.4319


In [35]:
print('XGBoost')
xgb_mod[0]

XGBoost


Unnamed: 0,group_left_out,accuracy,precision,recall,f1,mean_accuracy,mean_precision,mean_recall,mean_f1
0,SCI02,0.885417,0.863214,0.403509,0.425655,0.646736,0.470331,0.598909,0.449327
1,SCI03,0.75,0.505556,0.543653,0.520369,0.646736,0.470331,0.598909,0.449327
2,SCI06,0.582524,0.455995,0.238312,0.613988,0.646736,0.470331,0.598909,0.449327
3,SCI08,0.571429,0.666667,0.666667,0.333333,0.646736,0.470331,0.598909,0.449327
4,SCI10,0.380734,0.309012,0.737598,0.20543,0.646736,0.470331,0.598909,0.449327
5,SCI11,0.722222,0.479021,0.36917,0.531425,0.646736,0.470331,0.598909,0.449327
6,SCI12,0.681818,0.464386,0.516404,0.611609,0.646736,0.470331,0.598909,0.449327
7,SCI13,0.610169,0.327358,0.684468,0.412956,0.646736,0.470331,0.598909,0.449327
8,SCI14,0.538043,0.499704,0.535526,0.42481,0.646736,0.470331,0.598909,0.449327
9,SCI15,0.858824,0.455905,0.720552,0.578655,0.646736,0.470331,0.598909,0.449327
