In [58]:
import numpy as np
import scipy as sp
import os 
import pickle
import sys
import feature_extraction as fex
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

In [59]:
def create_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)
        print("Path created: ", path)

In [60]:
path = 'C:/ASM/DevData/eating_steven/data' if "C:" in os.getcwd() else 'data'    
with open(path+'/data_steven_lab.pkl', 'rb') as file:
    data = pickle.load(file)
    
features = fex.get_features_steven_lab()    

Features file exists. Reading from file...


In [61]:
def get_annots(t, a, window_len):      
    #print(a.shape)
    a = a[a[:, 3]!=2, :] # right hand only
    #print(a.shape, np.sum(a[:, -1]==0), np.sum((a[:, 2]==1) & (a[:, -1]>0)), np.sum((a[:, 2]==2) & (a[:, -1]>0)) )
    #print(np.sum(a[:, 2]==1), np.sum(a[:, 2]==2))
    tcount, acount = len(t), len(a)  
    
    siw = (t*16).astype(int)
    b1w = siw + window_len//4
    miw = siw + 2*window_len//4    
    b2w = siw + 3*window_len//4    
    
    labels=np.zeros((tcount,))    
    for i in range(acount):         
        if a[i, 2]==1: #bite
            si = int(a[i, 0]*16)
            cond = (b1w<=si) & (si<=b2w)
            label = 1
        else:#sip
            si = int(a[i, 0]*16)
            ei = int( (a[i, 0]+a[i, 1])*16)
            cond = (miw>=si) & (miw<=ei)
            label = 2
            
        if a[i, -1]==0:
            labels[cond] = label
        else:
            labels[cond] = -label
    
    #for i in range(-2, 3):
    #    print(i, ":", np.sum(labels==i))
        
    return np.array(labels)
    

In [62]:
def get_annots_lab():
    labels = []
    all_labels = np.empty((0,))
    for subj in range(len(features)):
        subj_labels= []
        for sess in range(len(features[subj])):            
            print(subj, sess, end=" | ")
            fex = features[subj][sess]
            annots = data[subj][sess]["annots"]
            l = get_annots(fex[:, 0], annots, 5*16)
            #print("Labels shape:", l.shape)
            subj_labels.append(l)            
            all_labels = np.concatenate((all_labels, l))
            
        labels.append(subj_labels)
        
    return labels, all_labels

In [63]:
labels, all_labels = get_annots_lab()
print(len(labels))

0 0 | 0 1 | 1 0 | 2 0 | 2 1 | 3 0 | 3 1 | 4 0 | 4 1 | 5 0 | 5 1 | 6 0 | 6 1 | 7


In [64]:
print(all_labels.shape)
for i in range(-2, 3):
    print(i, ":", np.sum(all_labels==i))

(2283679,)
-2 : 611
-1 : 370
0 : 2252500
1 : 26796
2 : 3402


In [65]:
def get_lopo_train_features_lables(exclude_subj, features, labels, convert_label=[], exclude_ambigious=True):    
    train_x, train_y = [], []
    for subj in range(len(features)):        
        if subj==exclude_subj:
            continue
        for sess in range(len(features[subj])):             
            f = features[subj][sess][:, 1:]
            l = np.copy(labels[subj][sess])            
            
            if exclude_ambigious:                
                cond = (l>=0)
                f = f[cond]
                l = l[cond]                
            else:
                cond = (l<0)
                l[cond] = -1*l[cond]            
            
            if len(convert_label)>0:                
                l[l==convert_label[0]] = convert_label[1]
            
            if len(train_x)==0:
                train_x = f
                train_y = l
            else:                
                train_x = np.concatenate((train_x, f))
                train_y = np.concatenate((train_y, l))
            
    return train_x, train_y
    

In [None]:
res_path = path + "/rf_results_lopo_binary"
create_directory(res_path)
convert_label=[2, 1]
exclude_ambigious=True

for subj in range(len(features)):    
    train_x, train_y = get_lopo_train_features_lables(subj, features, labels, convert_label=convert_label, exclude_ambigious=exclude_ambigious)
    print("Subj:", subj, ", ", "shapes:", train_x.shape, train_y.shape, "Bite, sip:", np.sum(train_y==1), np.sum(train_y==2))
    clf = RandomForestClassifier(n_estimators =100, random_state=0, n_jobs=-1)    
    clf.fit(train_x, train_y)    
    print("Training done!")
    
    for sess in range(len(features[subj])):
        plabel = clf.predict(features[subj][sess][:, 1:])
        proba =  clf.predict_proba(features[subj][sess][:, 1:])
        gtlabel = labels[subj][sess]        
        res = {"plabel":plabel, "proba":proba, "gtlabel":gtlabel}
        
        with open(res_path+'/result_'+str(subj)+'-'+str(sess)+'.pkl', 'wb') as file:
            pickle.dump(res, file)
        
        print("Results  saved for session ", sess)

    with open(res_path+'/model_'+str(subj)+'.pkl', 'wb') as file:
        pickle.dump(clf, file)
    
    print("Model saved")