In [1]:
import pickle
import pandas as pd
import polars as pl

SEED = 0

In [2]:
data = pickle.load(open('../data/all_data_processed.pkl', 'rb'))

### Let's test if ADLs can be predicted using single images first...

In [3]:
# create data df
df = pd.DataFrame(columns=['classes', 'active', 'adl', 'image'])

for id, dets in data.items():
    label = id.split('_', 1)[0]
    image = id.split('_', 1)[1]
    classes = dets['detic_data']['classes']
    active = dets['detic_data']['active']

    row = {'classes': classes, 'active': active, 'adl': label, 'image': image}

    df.loc[len(df)] = row

# one hot encode the classes but add counts for each class

# create a list of all classes
all_classes = []
for classes in df['classes']:
    all_classes.extend(classes)
    
# remove duplicates
all_classes = list(set(all_classes))

# create a column for each class
for c in all_classes:
    df[c] = 0

# loop through rows and set the value of the class column to the 
# number of times it appears in the classes column
for i, row in df.iterrows():
    for c in row['classes']:
        df.at[i, c] += 1

df.head()

Unnamed: 0,classes,active,adl,image,0,1,2,3,4,5,...,19,20,21,22,23,24,25,26,27,28
0,"[17, 15, 11, 11, 25, 5, 17, 21, 21, 24, 25, 26...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame49,0,0,0,0,0,1,...,0,0,3,0,0,3,2,1,0,0
1,"[17, 11, 11, 12, 26, 12, 10, 26, 9, 15, 26, 27...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame98,0,0,0,0,0,0,...,0,0,0,0,0,0,3,3,1,0
2,"[17, 12, 11, 11, 12, 16, 26, 10, 12, 9, 13, 26...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame196,0,0,0,0,0,0,...,0,0,0,0,0,1,0,3,0,0
3,"[17, 11, 12, 12, 26, 15, 11, 10, 26, 10, 25, 9...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame147,0,0,0,0,0,1,...,0,0,2,0,0,0,1,3,0,0
4,"[17, 11, 10, 11, 12, 26, 26, 12, 26, 25, 11, 7...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame294,0,0,0,0,0,0,...,0,0,1,0,0,0,1,4,1,0


### Naive classififcation without active objects

In [4]:
from sklearn.model_selection import train_test_split

X = df.drop(['classes', 'active', 'adl'], axis=1)
y = df['adl']

# split into train and test sets stratified by y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=SEED)

In [5]:
# train a model
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, random_state=SEED)
clf.fit(X_train.drop(columns=['image']), y_train)

In [6]:
# evaluate model
from egoviz.models.evaluation import evaluate_model, evaluate_k_fold

report, cm, preds_df = evaluate_model(clf, X_test.drop(columns=['image']), y_test)
print(report); print(cm)

                      precision    recall  f1-score   support

  communication-mgmt       0.63      0.70      0.67        27
 functional-mobility       0.52      0.33      0.41        33
grooming-health-mgmt       0.76      0.61      0.68        31
     home-management       0.56      0.55      0.55        42
       leisure-other       0.73      0.62      0.67        13
   meal-prep-cleanup       0.64      0.89      0.74        53
        self-feeding       0.78      0.71      0.75        35

            accuracy                           0.65       234
           macro avg       0.66      0.63      0.64       234
        weighted avg       0.65      0.65      0.64       234

[[19  2  0  2  1  0  3]
 [ 3 11  2  6  0 10  1]
 [ 2  3 19  5  1  1  0]
 [ 4  3  0 23  1 10  1]
 [ 0  2  0  3  8  0  0]
 [ 0  0  3  1  0 47  2]
 [ 2  0  1  1  0  6 25]]


In [7]:
# use k-fold cross validation to train a new model using f1 score

clf2 = RandomForestClassifier(n_estimators=100, random_state=SEED)
evaluate_k_fold(clf2, X, y, k=5)

f1_macro: 0.6881551428971046 +/- 0.03305029163875453


Unnamed: 0,fit_time,score_time,test_f1_macro,test_precision_macro,test_recall_macro,model,mean_f1_macro,std_f1_macro
0,0.292151,0.152693,0.731395,0.752493,0.718922,RandomForestClassifier,0.688155,0.036951
1,0.310135,0.026638,0.666953,0.68219,0.6794,RandomForestClassifier,0.688155,0.036951
2,0.284529,0.024984,0.700584,0.715248,0.69399,RandomForestClassifier,0.688155,0.036951
3,0.274262,0.024838,0.636325,0.648722,0.631454,RandomForestClassifier,0.688155,0.036951
4,0.294899,0.022966,0.705519,0.71916,0.699249,RandomForestClassifier,0.688155,0.036951
