In [1]:
#Run this line if these directories don't exist in the same directory as this notebook
#Will probably only need to run this chunk the first time you run the notebook

import os
os.mkdir("wrist_agg")
os.mkdir("hip_agg")
os.mkdir("chest_agg")
os.mkdir("thigh_agg")

In [2]:
import glob
import pandas as pd
import datetime

In [3]:
def aggregate_files(files, output_dir, increment=80): #80 for 1 sec, 800 for 10 sec
    for file in files:
        data = pd.read_csv(file, skiprows=10)
        description = pd.read_csv(file, nrows = 8)
        
        start_datetime_str = description.iloc[2,0][11:] + '/' + description.iloc[1,0][11:]
        day = start_datetime_str.split()
        start_datetime = datetime.datetime.strptime(start_datetime_str, '%m/%d/%Y/%H:%M:%S')
        
        agg = data.groupby(data.index // increment).mean()
        
#         agg['start_datetime'] = [start_datetime for i in range(len(agg))]

        agg['actual_datetime'] = list(map(lambda x, y: x + datetime.timedelta(seconds = y), 
                                          [start_datetime for i in range(len(agg))], 
                                          agg.index * increment / 80))
        
        filename = file.split("\\")[1].split("RAW")
        agg.to_csv(output_dir + "/" + filename[0] + "AGGREGATE" + filename[1], index=False)

In [4]:
output_wrist_dir = "wrist_agg"
output_hip_dir = "hip_agg"

#Location of raw wrist and hip files
path = "E:/Data/Monitor Data/"

wrist_files = glob.glob(path + "final_wrist_raw_csv/*")
hip_files = glob.glob(path + "hip/*")

In [5]:
wrist_files

['E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM01WRT (2017-10-02)RAW.csv',
 'E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM02WRT (2017-07-19)RAW.csv',
 'E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM03WRT (2017-07-24)RAW.csv',
 'E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM04WRT (2017-09-11)RAW.csv',
 'E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM05WRT (2017-09-19)RAW.csv',
 'E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM06WRT (2017-07-28)RAW.csv',
 'E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM07WRT (2017-07-31)RAW.csv',
 'E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM08WRT (2017-08-01)RAW.csv',
 'E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM09WRT (2017-08-08)RAW.csv',
 'E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM10WRT (2017-08-10)RAW.csv',
 'E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM11WRT (2017-08-30)RAW.csv',
 'E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM12WRT (2017-09-13)RAW.csv',
 'E:/Data/Monitor Data/final_wrist_raw_csv\\AG_AM13WRT (2017-09-18)RAW.csv',

In [6]:
hip_files

['E:/Data/Monitor Data/hip\\AG_AM01HIP (2017-07-14)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM01reHIP (2017-10-02)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM02HIP (2017-07-19)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM03HIP (2017-07-24)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM04reHIP (2017-09-11)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM05reHIP (2017-09-19)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM06HIP (2017-07-27)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM07HIP (2017-07-31)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM08HIP (2017-08-01)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM09HIP (2017-08-08)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM10HIP (2017-08-10)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM11HIP (2017-08-30)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM12HIP (2017-09-13)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM13HIP (2017-09-18)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM14HIP (2017-09-19)RAW.csv',
 'E:/Data/Monitor Data/hip\\AG_AM15HIP (2017-10-09)RAW.csv',
 'E:/Data/Monitor 

In [7]:
aggregate_files(wrist_files, output_wrist_dir)

In [8]:
aggregate_files(hip_files, output_hip_dir)

In [9]:
output_chest_dir = "chest_agg"
output_thigh_dir = "thigh_agg"

midpath = "BioStampRC/*/*/"

endpath = "*/*/accel.csv"

chest_files = glob.glob(path + midpath + "medial_chest/" + endpath)
thigh_files = glob.glob(path + midpath + "anterior_thigh_*/" + endpath)
annotations = glob.glob(path + midpath + "annotations.csv")

In [10]:
chest_files

['E:/Data/Monitor Data/BioStampRC\\AM_01re\\DO1\\medial_chest\\d5la7ycy\\2017-10-04T01-44-01-387Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_01re\\DO2\\medial_chest\\d5la7ydg\\2017-10-06T23-42-20-227Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_02\\DO1A\\medial_chest\\d5la7xya\\2017-07-25T14-42-49-390Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_02\\DO2H\\medial_chest\\d5la7ye3\\2017-07-24T20-15-14-377Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_03\\DO1\\medial_chest\\d5la7ye3\\2017-07-25T20-56-38-516Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_03\\DO2\\medial_chest\\d5la7ye3\\2017-07-27T20-03-39-175Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_04re\\DO1\\medial_chest\\d5la7ycy\\2017-09-12T19-00-06-680Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_04re\\DO2\\medial_chest\\d5la7ydg\\2017-09-17T21-13-53-301Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_05re\\DO1\\medial_chest\\d5la7ydg\\2017-09-20T00-27-48-556Z\\accel.csv',
 'E:/Da

In [11]:
thigh_files

['E:/Data/Monitor Data/BioStampRC\\AM_01re\\DO1\\anterior_thigh_left\\d5la7xya\\2017-10-04T01-43-57-110Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_01re\\DO2\\anterior_thigh_left\\d5la7y9s\\2017-10-06T23-42-16-622Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_02\\DO1A\\anterior_thigh_left\\d5la7ydg\\2017-07-25T14-42-45-789Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_02\\DO2H\\anterior_thigh_left\\d5la7ycy\\2017-07-24T20-15-10-645Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_03\\DO1\\anterior_thigh_left\\d5la7ycy\\2017-07-25T20-56-34-816Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_03\\DO2\\anterior_thigh_left\\d5la7xqf\\2017-07-27T20-03-35-756Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_04re\\DO1\\anterior_thigh_left\\d5la7xya\\2017-09-12T19-00-02-562Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_04re\\DO2\\anterior_thigh_left\\d5la7ycy\\2017-09-17T21-13-49-659Z\\accel.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_05re\\DO1\\anterior_thig

In [12]:
annotations

['E:/Data/Monitor Data/BioStampRC\\AM_01re\\DO1\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_01re\\DO2\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_02\\DO1A\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_02\\DO2H\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_03\\DO1\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_03\\DO2\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_04re\\DO1\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_04re\\DO2\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_05re\\DO1\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_05re\\DO2\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_06\\DO1\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_06\\DO2\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_07\\DO1\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_07\\DO2\\annotations.csv',
 'E:/Data/Monitor Data/BioStampRC\\AM_08\\DO1\\annotations.csv',
 'E:/Data/M

In [13]:
print(len(chest_files))
print(len(thigh_files))
print(len(annotations))

52
52
52


In [14]:
data = pd.read_csv(chest_files[0])

In [15]:
(data["Timestamp (ms)"] // (1000)).apply(lambda x: datetime.datetime.utcfromtimestamp(x))

0        2017-10-04 01:44:01
1        2017-10-04 01:44:01
2        2017-10-04 01:44:01
3        2017-10-04 01:44:01
4        2017-10-04 01:44:01
                 ...        
237315   2017-10-04 03:49:04
237316   2017-10-04 03:49:04
237317   2017-10-04 03:49:04
237318   2017-10-04 03:49:04
237319   2017-10-04 03:49:04
Name: Timestamp (ms), Length: 237320, dtype: datetime64[ns]

In [16]:
data = pd.read_csv(chest_files[0])

In [17]:
actual_datetime = (data["Timestamp (ms)"] // 1000).apply(lambda x: datetime.datetime.utcfromtimestamp(x))

In [18]:
(data.groupby(data.index // 31).mean()["Timestamp (ms)"] // 1000).apply(lambda x: datetime.datetime.utcfromtimestamp(x))

0      2017-10-04 01:44:01
1      2017-10-04 01:44:02
2      2017-10-04 01:44:03
3      2017-10-04 01:44:04
4      2017-10-04 01:44:05
               ...        
7651   2017-10-04 03:49:00
7652   2017-10-04 03:49:01
7653   2017-10-04 03:49:02
7654   2017-10-04 03:49:03
7655   2017-10-04 03:49:03
Name: Timestamp (ms), Length: 7656, dtype: datetime64[ns]

In [19]:
#Note - There are not exactly 31 observations per second, but 31 is the closest integer to number of observations per second
def biostamp_aggregate_files(files, output_dir, increment=31): #31 for 1s, 310 for 10s (exact value is probably closer to 308 or so, but for consistency we will use 310)
    for i in range(len(files)):
        file = files[i]
        # In case we want to take anything from annotations file
        annotation = annotations[i]
        
        data = pd.read_csv(file)
        
        agg = data.groupby(data.index // increment).mean()
        
        actual_datetime = (agg["Timestamp (ms)"] // 1000).apply(lambda x: datetime.datetime.utcfromtimestamp(x))
        agg["actual_datetime"] = actual_datetime
        agg.drop("Timestamp (ms)", axis=1, inplace=True)
        
        filename = file.split("\\")
        agg.to_csv(output_dir + "/" + filename[1] + "_" + filename[2] + "_" + filename[3] + "_accel_AGGREGATE.csv", index = False)

In [20]:
biostamp_aggregate_files(chest_files, output_chest_dir)

In [21]:
biostamp_aggregate_files(thigh_files, output_thigh_dir)