In [2]:
import pickle

In [28]:
data = pickle.load(open('../data/all_data_processed.pkl', 'rb'))

### Let's test if ADLs can be predicted using single images first...

In [44]:
# create dataframe from data where each row corresponds to a single image
# the columns are: classes: list of classes, active: list of active labels, adl: y-label

# loop through data and add rows

import pandas as pd

df = pd.DataFrame(columns=['classes', 'active', 'adl', 'image'])

for id, det in data.items():
    label = id.split('_', 1)[0]
    image = id.split('_', 1)[1]
    classes = det['detic_data']['classes']
    active = det['detic_data']['active']

    # add row to dataframe
    df = df.append({'classes': classes, 'active': active, 'adl': label, 'image': image}, ignore_index=True)

df.head()



Unnamed: 0,classes,active,adl,image
0,"[17, 15, 11, 11, 25, 5, 17, 21, 21, 24, 25, 26...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame49
1,"[17, 11, 11, 12, 26, 12, 10, 26, 9, 15, 26, 27...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame98
2,"[17, 12, 11, 11, 12, 16, 26, 10, 12, 9, 13, 26...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame196
3,"[17, 11, 12, 12, 26, 15, 11, 10, 26, 10, 25, 9...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame147
4,"[17, 11, 10, 11, 12, 26, 26, 12, 26, 25, 11, 7...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame294


In [45]:
# explode classes into their own columns in the same row (i.e. one-hot encode the classes but count the number of times they appear)

# create a list of all classes
all_classes = []
for classes in df['classes']:
    all_classes.extend(classes)
    
# remove duplicates
all_classes = list(set(all_classes))

# create a column for each class
for c in all_classes:
    df[c] = 0

# loop through rows and set the value of the class column to the number of times it appears in the classes column
for i, row in df.iterrows():
    for c in row['classes']:
        df.at[i, c] += 1
    
df.head()

Unnamed: 0,classes,active,adl,image,0,1,2,3,4,5,...,19,20,21,22,23,24,25,26,27,28
0,"[17, 15, 11, 11, 25, 5, 17, 21, 21, 24, 25, 26...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame49,0,0,0,0,0,1,...,0,0,3,0,0,3,2,1,0,0
1,"[17, 11, 11, 12, 26, 12, 10, 26, 9, 15, 26, 27...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame98,0,0,0,0,0,0,...,0,0,0,0,0,0,3,3,1,0
2,"[17, 12, 11, 11, 12, 16, 26, 10, 12, 9, 13, 26...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame196,0,0,0,0,0,0,...,0,0,0,0,0,1,0,3,0,0
3,"[17, 11, 12, 12, 26, 15, 11, 10, 26, 10, 25, 9...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame147,0,0,0,0,0,1,...,0,0,2,0,0,0,1,3,0,0
4,"[17, 11, 10, 11, 12, 26, 26, 12, 26, 25, 11, 7...","[True, False, False, False, False, False, Fals...",communication-mgmt,SCI06-7--11_frame294,0,0,0,0,0,0,...,0,0,1,0,0,0,1,4,1,0


### Naive classififcation without active objects

In [46]:
from sklearn.model_selection import train_test_split

X = df.drop(['classes', 'active', 'adl'], axis=1)
y = df['adl']

# split into train and test sets stratified by y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)



In [47]:
# train a model
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train.drop(columns=['image']), y_train)

RandomForestClassifier()

In [2]:
# evaluate model
from egoviz.models.evaluation import evaluate_model

report, cm = evaluate_model(clf, X_test.drop(columns=['image']), y_test, X_train.drop(columns=['image']), y_train)


ImportError: cannot import name 'Protocol' from 'typing' (/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/typing.py)

In [50]:
test_df = X_test.copy()
test_df['pred_adl'] = y_pred
test_df['actual_adl'] = y_test
test_df.head()

Unnamed: 0,image,0,1,2,3,4,5,6,7,8,...,21,22,23,24,25,26,27,28,pred_adl,actual_adl
816,SCI12-1--15_frame588,0,4,0,0,0,0,0,0,3,...,0,0,0,0,0,1,0,0,self-feeding,meal-prep-cleanup
96,SCI18-4--4_frame294,0,0,0,0,0,0,0,0,0,...,1,0,0,1,1,1,0,0,functional-mobility,communication-mgmt
547,SCI12-4--6_frame294,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,4,meal-prep-cleanup,home-management
826,SCI14-1--10_frame0,0,6,0,0,0,0,0,1,2,...,5,0,0,0,0,2,1,0,meal-prep-cleanup,meal-prep-cleanup
558,SCI15-1--4_frame196,0,8,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,home-management,home-management
