In [1]:
import numpy as np
import pickle
import os

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
sampling_rate = 16 #Hz

In [3]:
def resample(data, rate):
    interval = 1/rate
    duration = data[-1, 0]
    ts = np.arange(0, duration, interval)    
    count = len(ts)
    #print(interval, ", ",count)
    res = np.zeros((count-1, data.shape[1]))
    
    i = 0
    j = 0
    while i < count-1:
        t = ts[i]
        res[i, 0] = t        
        t1 = data[j, 0]
        t2 = data[j+1, 0]
        
        if t1 <= t < t2:
            factor = (t - t1)/(t2-t1);
            res[i, 1:] = (1-factor)*data[j, 1:]  + factor*data[j+1, 1:]
            i+=1
        
        while not(data[j, 0] <= ts[i] < data[j+1, 0]):
            j+=1   
            
    return res

In [4]:
def time_to_index(data, ts):
    count = len(ts)
    ix = np.zeros(ts.shape)
    
    i = 0
    j = 0
    while i < count:        
        if data[j, 0] <= ts[i] < data[j+1, 0]:
            ix[i] = j
            i += 1
        
        if i==count:
            break;
        
        while not(data[j, 0] <= ts[i] < data[j+1, 0]):
            j += 1
            
    return ix        
    

In [5]:
def process_annots_uva(annots, accel):
    annots[:, 0] = time_to_index(accel, annots[:, 0])
    
    for i in range(len(annots)):
        if 1 <= annots[i, 1] < 400:
            annots[i, 1] = 1
        elif 400 <= annots[i, 1] < 1000:
            annots[i, 1] = 2
        else:
            print("XXXXXXX UVA annot Problem XXXXXXXXXXX")
            
    annots = annots.astype(int)
    return annots
    

In [10]:
def read_data_uva_lab():
    print("Reading data UVA lab")
    path = 'E:\\DevData\\eating\\raw_data\\our_data\\lab_data\\'
    data = []
    for i in range(35):        
        accel = np.genfromtxt(path + "accel_"+str(i), delimiter=',')
        #print(accel.shape)
        #print(accel[:10, :])
        print("\nSession ", str(i), " Before process >> Duration: ", str(accel[0, 0]), " - ", str(accel[-1, 0]), ", Count: "+str(len(accel)) )         
        
        
        accel = resample(accel, sampling_rate)
        #print(accel[:10, :])
        #accel[:, 1:] = smooth_data(accel[:, 1:], smooth_factor)
        print("Session ", str(i), " After process >> Duration: ", str(accel[0, 0]), " - ", str(accel[-1, 0]), ", Count: "+str(len(accel)) )         
        #print(accel[:10, :])
          
        annots = np.genfromtxt(path+"annots_"+str(i), delimiter=',') 
        print("Session ", str(i), " Annots >> Duration: ", str(annots[0, 0]), " - ", str(annots[-1, 0]), ", Count: "+str(len(annots)) )         
        annots = process_annots_uva(annots, accel)        
        #print(annots[:10, :])
        
        dsess = [accel, annots]        
        dsubject = [dsess]        
        data.append(dsubject)
    
    dm.save_data(data, "uva_lab_data", "data")            
    return data
    

In [11]:
data = read_data_uva_lab()
print("UVA lab Subject count: ", len(data))

Reading data UVA lab

Session  0  Before process >> Duration:  0.0  -  2519.9878368 , Count: 151894
Session  0  After process >> Duration:  0.0  -  2519.875 , Count: 40319
Session  0  Annots >> Duration:  103.2  -  2071.0 , Count: 102

Session  1  Before process >> Duration:  0.0  -  2107.51441538 , Count: 127874
Session  1  After process >> Duration:  0.0  -  2107.4375 , Count: 33720
Session  1  Annots >> Duration:  131.7  -  2053.6 , Count: 115

Session  2  Before process >> Duration:  0.0  -  2190.18375327 , Count: 136694
Session  2  After process >> Duration:  0.0  -  2190.0625 , Count: 35042
Session  2  Annots >> Duration:  135.7  -  2056.4 , Count: 81

Session  3  Before process >> Duration:  0.0  -  2111.33027864 , Count: 129059
Session  3  After process >> Duration:  0.0  -  2111.25 , Count: 33781
Session  3  Annots >> Duration:  57.7  -  1836.4 , Count: 78

Session  4  Before process >> Duration:  0.0  -  2002.71029859 , Count: 122886
Session  4  After process >> Duration:  0.

In [14]:
def read_data_uva_free():
    print("Reading data UVA free")
    path = 'E:\\DevData\\eating\\raw_data\\our_data\\free_data\\'
    data = []
    for i in range(16):        
        accel = np.genfromtxt(path + "accel_"+str(i), delimiter=',')                
        print("Session ", str(i), " Before process >> Duration: ", str(accel[0, 0]), " - ", str(accel[-1, 0]), ", Count: "+str(len(accel)) )         
        #print(accel[:10, :])
        
        accel = resample(accel, sampling_rate)
        #print(accel[:10, :])
        #accel[:, 1:] = smooth_data(accel[:, 1:], smooth_factor)
        print("Session ", str(i), " After process >> Duration: ", str(accel[0, 0]), " - ", str(accel[-1, 0]), ", Count: "+str(len(accel)) )         
        #print(accel[:10, :])
          
        annots = [] 
        
        dsess = [accel, annots]        
        dsubject = [dsess]        
        data.append(dsubject)
    
    dm.save_data(data, "uva_free_data", "data")
        
    return data

In [15]:
data = read_data_uva_free()
print("UVA free Subject count: ",len(data))

Reading data UVA free
Session  0  Before process >> Duration:  0.0  -  2918.12415529 , Count: 177491
Session  0  After process >> Duration:  0.0  -  2918.0 , Count: 46689
[[ 0.         -0.69415563 -2.5994933   9.349558  ]
 [ 0.0625     -0.6747161  -2.427151    9.31213123]
 [ 0.125      -0.7091976  -2.4654493   9.29484384]
 [ 0.1875     -0.76591364 -2.56835014  9.27937923]
 [ 0.25       -0.64242532 -2.56205548  9.37642232]
 [ 0.3125     -0.58915509 -2.48449187  9.33987746]
 [ 0.375      -0.66287897 -2.54587688  9.29402614]
 [ 0.4375     -0.67638651 -2.5037475   9.330409  ]
 [ 0.5        -0.62948066 -2.48478516  9.3331294 ]
 [ 0.5625     -0.63924669 -2.51136279  9.30676182]]
Session  1  Before process >> Duration:  0.0  -  2441.32360804 , Count: 148916
Session  1  After process >> Duration:  0.0  -  2441.25 , Count: 39061
[[  0.           0.665432     6.218677     7.788905  ]
 [  0.0625       1.49660921   6.62591908   7.8368175 ]
 [  0.125        1.92958375   6.20138705   7.83646758]
 [ 

In [16]:
def process_bite_label_steven(label):
    label = label.replace("P","1").replace("D","2").replace("I","3").replace("Q","4")
    label = label.replace("N","5").replace("C","6").replace("S","7").replace("M","8")
    label = label.replace("R","1").replace("L","2").replace("X","-1")
    return int(label)

def process_bite_annots_steven(annots, accel):    
    a = np.zeros((len(annots), 2))
    for i in range(len(annots)):
        s = annots[i].split(",")
        t1 = float(s[0].rstrip())
        t2 = float(s[1].rstrip())
        label = process_bite_label_steven(s[2].rstrip())
        a[i, 0] = t1+t2/2
        a[i, 1] = label
    
    res = []
    hand = 0    
    last_was_bite = False
    for i in range(len(a)):
        #print(a[i, 0], ", ", a[i, 1])
        if a[i, 1]==21 or a[i,1] ==22:            
            if last_was_bite:    
                if a[i, 1]==21:
                    hand = 1
                else:
                    hand = 2
            else:
                if hand ==1 and a[i, 1]==21 or hand ==2 and a[i, 1]==22:
                    print( "************************ Similar hand found twice ****************")
                
                hand = 3
            
            last_was_bite = False
            
        elif a[i, 1]==3 or a[i, 1]==4:
            if hand==0:
                print("********** Hand is not found : " , annots[i] , ", line no: ", (i + 1))                
                continue
            
            r = [0, 0]
            r[0] = a[i, 0]
            if a[i, 1]==3:    #bite
                r[1] = 1
            else:            #drink
                r[1] = 2
            
            if hand ==1 or hand ==3: #add right hand data only
                res.append(r)
            
            if last_was_bite:
                print("************************ Bite repeated **************** ", i)
            last_was_bite = True
        
    #print(res)            
    res = np.array(res)
    #print(res.shape)
    res[:, 0] = time_to_index(accel, res[:, 0])
    res = res.astype(int)
    return res

In [23]:
def read_data_steven_lab():
    path = 'E:\\DevData\\eating\\raw_data\\steventech_data\\lab_public'
    
    data = [];
    for subject in range(7):
        dsubject = [];
        for sess in range(2):
            if (subject==0 and sess==0) or (subject==1 and sess==1):
                continue
            
            filePathAccel = path + "\\0" + str(subject) + "\\000" + str(sess) + "\\watch_right_000" + str(sess) + ".csv";
            filePathAnnots = path + "\\0" + str(subject) + "\\000" + str(sess) + "\\annot_events.csv";
            
            accel = np.genfromtxt(filePathAccel, delimiter=',')
            accel = accel[:, :4]
            accel[:, 0] = accel[:, 0]/1e9
            print("Subject ", str(subject), ", Session ", str(sess), " Before process >> Duration: ", str(accel[0, 0]), " - ", str(accel[-1, 0]), ", Count: "+str(len(accel)) )         
            #print(accel[:10, :])
        
            accel = resample(accel, sampling_rate)            
            #accel[:, 1:] = smooth_data(accel[:, 1:], smooth_factor)
            print("Subject ", str(subject), ", Session ", str(sess), " After process >> Duration: ", str(accel[0, 0]), " - ", str(accel[-1, 0]), ", Count: "+str(len(accel)) )         
            print(accel[:5, :])
          
            file = open(filePathAnnots)
            annots = file.readlines()
            annots = process_bite_annots_steven(annots, accel)        
            print("Subject ", str(subject), ", Session ", str(sess), " Annots >> Duration: ", str(annots[0, 0]), " - ", str(annots[-1, 0]), ", Count: "+str(len(annots)) )         
            print(annots[:5, :])
            
            dsess = [accel, annots]        
            dsubject.append(dsess)        
            
        data.append(dsubject)
    
    dm.save_data(data, "steven_lab_data", "data")
    
    return data
    
    

In [24]:
data = read_data_steven_lab()
print("Steven lab Subject count: ",len(data))

Subject  0 , Session  1  Before process >> Duration:  0.0  -  23392.876032 , Count: 384325
Subject  0 , Session  1  After process >> Duration:  0.0  -  23392.8125 , Count: 374286
[[  0.           1.5321394   -4.7046065   18.491467  ]
 [  0.0625      -1.55545502  -0.84243863  13.07159388]
 [  0.125        0.49037331  -2.27770684  11.61631492]
 [  0.1875       1.21526665  -2.76804569  10.19072067]
 [  0.25         1.42990789  -2.57645989   9.49637374]]
************************ Bite repeated ****************  2733
************************ Bite repeated ****************  2774
************************ Bite repeated ****************  2777
************************ Bite repeated ****************  2789
Subject  0 , Session  1  Annots >> Duration:  7234  -  370528 , Count: 380
[[7234    2]
 [7452    1]
 [7517    1]
 [7706    1]
 [8105    1]]
Subject  1 , Session  0  Before process >> Duration:  0.0  -  21589.8566918 , Count: 311146
Subject  1 , Session  0  After process >> Duration:  0.0  -  215

In [25]:
def get_meal_code(meal):    
    code = -1
    if meal == "meal" or meal=="lunch" or meal=="dinner":
        code = 1
    elif meal=="snack":
        code = 2    
    elif meal=="drink":
        code = 3
    return code;

def process_meal_annots_steven(annots, accel, subject_code, session):
    meal_count = len(annots)
    accel_count = len(accel)
    if subject_code == 5 and session == 0:
        meal_count = 9
    elif subject_code == 6 and session == 0:
        meal_count = 2
    elif subject_code == 102 and session == 1:
        meal_count = 1
    elif subject_code == 104 and session == 0:
        meal_count = 7
    
    j = 0
    a = np.zeros((meal_count, 3))
    for i in range(meal_count):        
        s = annots[i].split(",")
        t1 = float(s[1].rstrip())
        t2 = float(s[2].rstrip())
        a[i, 2] = get_meal_code(s[3].rstrip().strip())
        
        if a[i, 2]==-1:
            print(" \n\n******************* Meal code problem *************** code is -1\n\n")
        
        t = t1
        while j<accel_count-1:
            if accel[j, 0] <= t < accel[j+1, 0]:            
                a[i, 0] = j
                break            
            j+=1
            
        t = t2
        while j<accel_count-1:
            if accel[j, 0] <= t < accel[j+1, 0]:            
                a[i, 1] = j
                break            
            j+=1
        
        if a[i][1] == 0:
            if subject_code == 107 and session == 0 or subject_code == 107 and session == 2:
                a[i, 1] = accel_count - 1
        
        if a[i, 0] == 0 or a[i, 1] == 0:
            print("***** Time indexing problem. Meal line: " , (i + 1))
        
    return a

In [32]:
def read_data_steven_free():
    path = 'E:\\DevData\\eating\\raw_data\\steventech_data\\ACE_FL_public'
    subject_codes = [2, 3, 4, 5, 6, 101, 102, 103, 104, 107, 109]
    
    data = [];
    for subject in range(len(subject_codes)):
        dsubject = [];
        sess_count = 2
        if subject_codes[subject] == 107:
            sess_count = 5
        
        for sess in range(sess_count):
            if subject_codes[subject] < 10:
                filePathAccel = path + "\\0" + str(subject_codes[subject]) + "\\000" + str(sess) + "\\watch_right_000" + str(sess) + ".csv";
                filePathAnnots = path + "\\0" + str(subject_codes[subject]) + "\\000" + str(sess) + "\\meal_events.csv";
            elif subject_codes[subject] == 109:
                filePathAccel = path + "\\" + str(subject_codes[subject]) + "\\000" + str(sess+3) + "\\watch_right_000" + str(sess+3) + ".csv";
                filePathAnnots = path + "\\" + str(subject_codes[subject]) + "\\000" + str(sess+3) + "\\meal_events.csv";
            else:
                filePathAccel = path + "\\" + str(subject_codes[subject]) + "\\000" + str(sess) + "\\watch_right_000" + str(sess) + ".csv";
                filePathAnnots = path + "\\" + str(subject_codes[subject]) + "\\000" + str(sess) + "\\meal_events.csv";
          
            accel = np.genfromtxt(filePathAccel, delimiter=',')
            accel = accel[:, :4]
            accel[:, 0] = accel[:, 0]/1e9
            print("\n\nSubject ", str(subject), ", Session ", str(sess), " Before process >> Duration: ", str(accel[0, 0]), " - ", str(accel[-1, 0]), ", Count: "+str(len(accel)) )         
            #print(accel[:10, :])
        
            accel = resample(accel, sampling_rate)            
            #accel[:, 1:] = smooth_data(accel[:, 1:], smooth_factor)
            print("Subject ", str(subject), ", Session ", str(sess), " After process >> Duration: ", str(accel[0, 0]), " - ", str(accel[-1, 0]), ", Count: "+str(len(accel)) )         
            #print(accel[:10, :])
          
            file = open(filePathAnnots)
            annots = file.readlines()
            annots = process_meal_annots_steven(annots, accel, subject_codes[subject], sess)        
            print("Subject ", str(subject), ", Session ", str(sess), " Annots >> Duration: ", str(annots[0, 0]), " - ", str(annots[-1, 0]), ", Count: "+str(len(annots)) )         
            print(annots)
            
            dsess = [accel, annots]        
            dsubject.append(dsess)        
            
        data.append(dsubject)
    
    dm.save_data(data, "steven_free_data", "data")
    
    return data
    

In [33]:
data = read_data_steven_free()
print("Steven Free Subject count: ",len(data))



Subject  0 , Session  0  Before process >> Duration:  0.0  -  40424.5294861 , Count: 725551
Subject  0 , Session  0  After process >> Duration:  0.0  -  40424.4375 , Count: 646792
Subject  0 , Session  0  Annots >> Duration:  12888.0  -  294585.0 , Count: 3
[[  1.28880000e+04   1.88310000e+04   1.00000000e+00]
 [  7.72310000e+04   7.81750000e+04   3.00000000e+00]
 [  2.94585000e+05   3.22326000e+05   1.00000000e+00]]


Subject  0 , Session  1  Before process >> Duration:  0.0  -  38185.6452497 , Count: 550555
Subject  0 , Session  1  After process >> Duration:  0.0  -  38185.5625 , Count: 610970
Subject  0 , Session  1  Annots >> Duration:  33449.0  -  479052.0 , Count: 5
[[  3.34490000e+04   4.43780000e+04   1.00000000e+00]
 [  1.32596000e+05   1.33317000e+05   3.00000000e+00]
 [  3.10975000e+05   3.21961000e+05   1.00000000e+00]
 [  3.99570000e+05   4.00600000e+05   3.00000000e+00]
 [  4.79052000e+05   4.79898000e+05   3.00000000e+00]]


Subject  1 , Session  0  Before process >> D

In [34]:
#smooth and save data
factors = [800, 825, 850, 875, 900]
ds_names = ["uva_lab_data", "uva_free_data", "steven_lab_data", "steven_free_data"]

for dsn in ds_names:    
    for f in factors:    
        data = dm.get_data(dsn, "data")
        factor = f/1000
        print(dsn, " : ", factor)
        data = myutils.smooth_dataset(data, factor)
        dm.save_data(data, dsn+"_"+str(f), "data_smooth")


uva_lab_data  :  0.8
Smothing data with factor  0.8  ...
Subject  0 , Session  0
Subject  1 , Session  0
Subject  2 , Session  0
Subject  3 , Session  0
Subject  4 , Session  0
Subject  5 , Session  0
Subject  6 , Session  0
Subject  7 , Session  0
Subject  8 , Session  0
Subject  9 , Session  0
Subject  10 , Session  0
Subject  11 , Session  0
Subject  12 , Session  0
Subject  13 , Session  0
Subject  14 , Session  0
Subject  15 , Session  0
Subject  16 , Session  0
Subject  17 , Session  0
Subject  18 , Session  0
Subject  19 , Session  0
Subject  20 , Session  0
Subject  21 , Session  0
Subject  22 , Session  0
Subject  23 , Session  0
Subject  24 , Session  0
Subject  25 , Session  0
Subject  26 , Session  0
Subject  27 , Session  0
Subject  28 , Session  0
Subject  29 , Session  0
Subject  30 , Session  0
Subject  31 , Session  0
Subject  32 , Session  0
Subject  33 , Session  0
Subject  34 , Session  0
uva_lab_data  :  0.825
Smothing data with factor  0.825  ...
Subject  0 , Sess