In [1]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import pathlib

%matplotlib inline

In [None]:
#---------------------------------------------------------------------------------------------------------
# Set path to folder containing raw subject data
path = r'C:\Users\andre\Documents\DOD Data'
#---------------------------------------------------------------------------------------------------------

newDataFlag = 0
# newData is a flag for whether previously exported .csv data needs to be corrected and, if so, re-exports and overwrites
# old .csv data. Default is 0, which will only write new .csv files which previously did not exist

complete = list(['MVC', 'Pavement', 'Crosswalk Test'])

def extract_data(SubID, path):
#---------------------------------------------------------------------------------------------------------
# For a given subject, extracts and separates accelerometer, gyroscope, and EMG/ECG data into trials and sensor per activity
#
# Inputs: SubID - string of numbers corresponding to the subject ID
#         path - system path to corresponding subject's raw data files
#
# Outputs: act_dict - dictionary of all activities separated by trial, sensor location, and accelerometer + gyroscope
#                     or accelerometer + EMG/ECG data. Every key within this dictionary is a dictionary
#---------------------------------------------------------------------------------------------------------
    timestamps = pd.read_csv(os.path.join(path, 'annotations.csv')).set_index('EventType')
    del timestamps['Timestamp (ms)']
    del timestamps['AnnotationId']
    del timestamps['AuthorId']
    
    # Creates list of sensor locations from folders within subject's raw data directory
    locations = [locs for locs in os.listdir(path) if os.path.isdir(os.path.join(path, locs))]
    
    # Creates dictionary of empty dataframes to merge all accelerometer, gyroscope, and EMG/ECG data for each sensor
    accel = {locs: pd.DataFrame() for locs in locations}
    gyro = {locs: pd.DataFrame() for locs in locations}
    emg = {locs: pd.DataFrame() for locs in locations}
    
    # Finds and merges all accelerometer, gyroscope, and EMG/ECG data for each sensor, retains datetime information
    for root, dirs, files in os.walk(path, topdown=True):
        for filenames in files:
            if filenames.endswith('accel.csv'):
                p = pathlib.Path(os.path.join(root, filenames))
                location = str(p.relative_to(path)).split("\\")[0]
                temp_df = pd.read_csv(p).set_index('Timestamp (ms)')
                accel[location] = accel[location].append(temp_df)

            elif filenames.endswith('gyro.csv'):
                p = pathlib.Path(os.path.join(root, filenames))
                location = str(p.relative_to(path)).split("\\")[0]
                temp_df = pd.read_csv(p).set_index('Timestamp (ms)')
                gyro[location] = gyro[location].append(temp_df)

            if filenames.endswith(('elec.csv', 'emg.csv')):
                p = pathlib.Path(os.path.join(root, filenames))
                location = str(p.relative_to(path)).split("\\")[0]
                temp_df = pd.read_csv(p).set_index('Timestamp (ms)')
                emg[location] = emg[location].append(temp_df)
                
    # Complete dictionary of all activities
    act_dict = {acts: pd.DataFrame() for acts in complete}

    # Populate dictionary keys per activity with every iteration / trial
    for activities in complete:
        
        startTimestamp = timestamps.loc[activities, 'Start Timestamp (ms)'].values
        endTimestamp = timestamps.loc[activities, 'Stop Timestamp (ms)'].values

        # Create trial dictionary with each key containing all sensor data related with each activity's trial
        trial_dict = {trials: pd.DataFrame() for trials in range(0, len(startTimestamp))}

        # Populate trial directory keys
        for trials in range(0, len(startTimestamp)):

            startTime = startTimestamp[trials]
            endTime = endTimestamp[trials]

            # Create sensor location dictionary with each key corresponding to sensor locations
            sensor_dict = {locs: pd.DataFrame() for locs in locations}

            # Extract sensor data and populate sensor_dict with sensor data
            for location in locations:

                data = {'accel': pd.DataFrame(), 'gyro': pd.DataFrame(), 'emg': pd.DataFrame()}

                if not accel[location].empty:
                    accelData = accel[location]
                    data['accel'] = accelData[(accelData.index >= startTime) & (accelData.index <= endTime)]

                if not gyro[location].empty:
                    gyroData = gyro[location]
                    data['gyro'] = gyroData[(gyroData.index >= startTime) & (gyroData.index <= endTime)]

                if not emg[location].empty:
                    emgData = emg[location]
                    data['emg'] = emgData[(emgData.index >= startTime) & (emgData.index <= endTime)]

                sensor_dict[location] = data

            trial_dict[trials] = sensor_dict

        act_dict[activities] = trial_dict
    
    export_data(act_dict, path, SubID, newDataFlag)
    
    return act_dict

def plot_data(acts, activity, trial, sensor, data, start=0, end=500):
#---------------------------------------------------------------------------------------------------------
# Plots requested data
# 
# Inputs: acts - activity dataframe containing all sensor data from one subject
#         activity - desired activity to plot
#         trial - desired trial number to plot
#         sensor - desired sensor (serial number or name of location) to analyze
#         data - desired type of data to analyze (accel, gyro, EMG/ECG)
#         start - starting index, default starts at first point
#         end - ending index, default is 500th data point
#---------------------------------------------------------------------------------------------------------
    toPlot = pd.DataFrame(acts[activity][trial]).loc[data, sensor][start:end].plot()
    

def export_data(act_data, path, SubID, newDataFlag):
#---------------------------------------------------------------------------------------------------------
# Exports accel emg data correlating with timestamped activities into .csv files into respective folders
#
# Inputs: act_data - dictionary of all activities per subject
#         path - directory path pointing to location of subject folder, generally handled by a higher-level for loop
#         SubID - subject ID to analyze, handled by extract_data function
#         newDataFlag - flag for whether to re-export existing data
#
# Outputs: .csv files of all data separated by activity, sensor location, and start and end timestamps
#---------------------------------------------------------------------------------------------------------
    for activities in act_data:
        
        for trials in act_data[activities]:

            for sensors in act_data[activities][trials]:

                for data in act_data[activities][trials][sensors]:
                    
                    df = act_data[activities][trials][sensors][data]

                    if not df.empty:
                        filename = data + '_' + str(trials) + '.csv'
                        folder = os.path.join('TimeSplitData', SubID, activities, sensors)
                        
                        if not os.path.isdir(folder):
                            os.makedirs(folder)
                            
                        if newDataFlag:
                            df.to_csv(os.path.join(folder, filename))
                        else:
                            if not os.path.exists(os.path.join(folder, filename)):
                                df.to_csv(os.path.join(folder, filename))

In [None]:
Subjects = {IDs.lower(): pd.DataFrame() for IDs in os.listdir(path) if os.path.isdir(os.path.join(path, IDs)) and 'pka' in IDs}

for ID in Subjects:
    act_dict = extract_data(ID, os.path.join(path, ID))
    
    Subjects[ID] = act_dict

In [None]:
acts = Subjects['pka03']
plot_data(acts, 'Pavement', 0, 'd14spk18', 'accel')

In [None]:
plot_data(acts, 'Pavement', 1, 'd5la7wyb', 'accel')

In [None]:
plot_data(acts, 'Pavement', 0, 'd5la7wz0', 'emg')

In [None]:
plot_data(acts, 'Pavement', 1, 'd5la7wz0', 'emg')

In [None]:
Subjects